{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [], "op_list": [ { "name": "/pos_embed/proj/Conv", "type": "SDConv", "in_args": [ "hidden_states_nhwc.out5_0_0" ], "const_args": [ "existing_model.pos_embed.proj.weight" ], "out_args": [ "/pos_embed/Transpose_output_0.out5_0_0" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed/Add_2", "type": "SDAdd", "in_args": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2" ], "const_args": [], "out_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "1", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/Cast_output_0.out17_3_3" ], "const_args": [ "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1", "256" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "256", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/act/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "const_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1" ], "out_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "const_args": [ "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "pooled_projections.out17_3_1" ], "const_args": [ "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2" ], "out_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1", "2048" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "2048", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/act_1/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "const_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0" ], "out_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "const_args": [ "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3" ], "out_args": [ "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/Add", "type": "SDAdd", "in_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "const_args": [], "out_args": [ "/time_text_embed/Add_output_0.out_35_1_3" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/silu/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/Add_output_0.out_35_1_3" ], "const_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2" ], "out_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp", "type": "SDCastBf2Bfp", "in_args": [ "encoder_hidden_states.out17_3_0" ], "const_args": [ "encoder_hidden_states.out17_3_0_bfp.wts" ], "out_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "4096" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/MatMul", "type": "SDGemm_bfp", "in_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "const_args": [ "onnx::MatMul_11911" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "4096", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "const_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1" ], "out_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "const_args": [ "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1" ], "out_args": [ "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_6_16_0", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106" ], "const_args": [ "onnx::MatMul_11927_onnx::MatMul_11912" ], "out_args": [ "/transformer_blocks.0/attn/Concat_6_output_0_16_0.out18_1_0_bfp.out19_0" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4" ], "out_args": [ "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_7_16_0", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106" ], "const_args": [ "onnx::MatMul_11928_onnx::MatMul_11913" ], "out_args": [ "/transformer_blocks.0/attn/Concat_7_16_0.out18_1_0_bfp.out19_1" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106" ], "const_args": [ "onnx::MatMul_11929_onnx::MatMul_11914" ], "out_args": [ "/transformer_blocks.0/attn/Concat_8_3d.out18_1_0_bfp.out23_0" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/MatMul_16_0mha_18_0_0", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.0/attn/Concat_6_output_0_16_0.out18_1_0_bfp.out19_0", "/transformer_blocks.0/attn/Concat_7_16_0.out18_1_0_bfp.out19_1", "/transformer_blocks.0/attn/Concat_8_3d.out18_1_0_bfp.out23_0" ], "const_args": [], "out_args": [ "/transformer_blocks.0/attn/Reshape_6_output_0.out18_1_0_bfp.out27_0_0" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_6_output_0.out18_1_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_11937" ], "out_args": [ "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_5_output_0.out10_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_5_output_0.out10_0" ], "const_args": [ "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4" ], "out_args": [ "/transformer_blocks.0/Add_7_output_0.out0_0_4_bfp.out1_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_7_output_0.out0_0_4_bfp.out1_1" ], "const_args": [ "onnx::MatMul_11952" ], "out_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_8" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_8" ], "const_args": [ "onnx::MatMul_11953" ], "out_args": [ "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_10_bfp.out25_10" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_10_bfp.out25_10", "/transformer_blocks.0/Add_5_output_0.out10_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_8_output_0.out10_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_6_output_0.out18_1_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_11938" ], "out_args": [ "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/pos_embed/Add_2_output_0.out_35_1_2" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_output_0.out10_104" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_8_output_0.out10_1" ], "const_args": [ "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107" ], "const_args": [ "onnx::MatMul_11940" ], "out_args": [ "/transformer_blocks.0/attn2/Reshape_1_output_0.out17_0_4_bfp.out21_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107" ], "const_args": [ "onnx::MatMul_11939" ], "out_args": [ "/transformer_blocks.0/attn2/Reshape_output_0.out17_0_5_bfp.out21_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107" ], "const_args": [ "onnx::MatMul_11941" ], "out_args": [ "/transformer_blocks.0/attn2/to_v/Add_output_0.out17_3_7_bfp.out25_5" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn2/MatMul_17_0mha_18_0_1", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.0/attn2/Reshape_output_0.out17_0_5_bfp.out21_0", "/transformer_blocks.0/attn2/Reshape_1_output_0.out17_0_4_bfp.out21_1", "/transformer_blocks.0/attn2/to_v/Add_output_0.out17_3_7_bfp.out25_5" ], "const_args": [], "out_args": [ "/transformer_blocks.0/attn2/Reshape_3_output_0.out20_0_bfp.out27_0_1" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/attn2/Reshape_3_output_0.out20_0_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_11949" ], "out_args": [ "/transformer_blocks.0/attn2/to_out.0/Add_output_0.out17_3_8_bfp.out25_6" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn2/to_out.0/Add_output_0.out17_3_8_bfp.out25_6", "/transformer_blocks.0/Add_output_0.out10_104" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_1_output_0.out10_105" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_1_output_0.out10_105" ], "const_args": [ "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_6_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_7" ], "out_args": [ "/transformer_blocks.0/Add_3_output_0.out0_0_3_bfp.out1_108" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out0_0_3_bfp.out1_108" ], "const_args": [ "onnx::MatMul_11950" ], "out_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_7" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_7" ], "const_args": [ "onnx::MatMul_11951" ], "out_args": [ "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_9_bfp.out25_9" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_9_bfp.out25_9", "/transformer_blocks.0/Add_1_output_0.out10_105" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_8_gma" ], "out_args": [ "/transformer_blocks.0/Add_4_output_0.out10_106" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add", "type": "SDAdd", "in_args": [ "/transformer_blocks.0/Add_4_output_0.out10_106", "block_controlnet_hidden_states_0.out_35_1_4" ], "const_args": [], "out_args": [ "/Add_output_0.out_35_1_4" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_output_0.out_35_1_4" ], "const_args": [ "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1" ], "out_args": [ "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4" ], "out_args": [ "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1" ], "out_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_6_16_1", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2" ], "const_args": [ "onnx::MatMul_11969_onnx::MatMul_11954" ], "out_args": [ "/transformer_blocks.1/attn/Concat_6_output_0_16_1.out18_1_1_bfp.out19_2" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_7_16_1", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2" ], "const_args": [ "onnx::MatMul_11970_onnx::MatMul_11955" ], "out_args": [ "/transformer_blocks.1/attn/Concat_7_16_1.out18_1_1_bfp.out19_3" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2" ], "const_args": [ "onnx::MatMul_11971_onnx::MatMul_11956" ], "out_args": [ "/transformer_blocks.1/attn/Concat_8_3d.out18_1_1_bfp.out23_1" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/MatMul_16_1mha_18_0_2", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.1/attn/Concat_6_output_0_16_1.out18_1_1_bfp.out19_2", "/transformer_blocks.1/attn/Concat_7_16_1.out18_1_1_bfp.out19_3", "/transformer_blocks.1/attn/Concat_8_3d.out18_1_1_bfp.out23_1" ], "const_args": [], "out_args": [ "/transformer_blocks.1/attn/Reshape_6_output_0.out18_1_1_bfp.out27_0_2" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_6_output_0.out18_1_1_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_11980" ], "out_args": [ "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/Add_output_0.out_35_1_4" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_6_output_0.out18_1_1_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_11979" ], "out_args": [ "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_11982" ], "out_args": [ "/transformer_blocks.1/attn2/Reshape_1_output_0.out17_0_10_bfp.out21_3" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.0/Add_8_output_0.out10_1" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_5_output_0.out10_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_5_output_0.out10_5" ], "const_args": [ "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4" ], "out_args": [ "/transformer_blocks.1/Add_7_output_0.out0_0_9_bfp.out1_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_7_output_0.out0_0_9_bfp.out1_6" ], "const_args": [ "onnx::MatMul_11994" ], "out_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_14" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_14" ], "const_args": [ "onnx::MatMul_11995" ], "out_args": [ "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_16_bfp.out25_16" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_16_bfp.out25_16", "/transformer_blocks.1/Add_5_output_0.out10_5" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_8_output_0.out10_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_8_output_0.out10_6" ], "const_args": [ "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_11981" ], "out_args": [ "/transformer_blocks.1/attn2/Reshape_output_0.out17_0_11_bfp.out21_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_11983" ], "out_args": [ "/transformer_blocks.1/attn2/to_v/Add_output_0.out17_3_13_bfp.out25_11" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn2/MatMul_17_1mha_18_0_3", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.1/attn2/Reshape_output_0.out17_0_11_bfp.out21_2", "/transformer_blocks.1/attn2/Reshape_1_output_0.out17_0_10_bfp.out21_3", "/transformer_blocks.1/attn2/to_v/Add_output_0.out17_3_13_bfp.out25_11" ], "const_args": [], "out_args": [ "/transformer_blocks.1/attn2/Reshape_3_output_0.out20_1_bfp.out27_0_3" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/attn2/Reshape_3_output_0.out20_1_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_11991" ], "out_args": [ "/transformer_blocks.1/attn2/to_out.0/Add_output_0.out17_3_14_bfp.out25_12" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn2/to_out.0/Add_output_0.out17_3_14_bfp.out25_12", "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_1_output_0.out10_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_1_output_0.out10_3" ], "const_args": [ "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_6_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_7" ], "out_args": [ "/transformer_blocks.1/Add_3_output_0.out0_0_8_bfp.out1_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out0_0_8_bfp.out1_4" ], "const_args": [ "onnx::MatMul_11992" ], "out_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_13" ], "const_args": [ "onnx::MatMul_11993" ], "out_args": [ "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_15_bfp.out25_15" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_15_bfp.out25_15", "/transformer_blocks.1/Add_1_output_0.out10_3" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_8_gma" ], "out_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_1", "type": "SDAdd", "in_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4", "block_controlnet_hidden_states_0.out_35_1_4" ], "const_args": [], "out_args": [ "/Add_1_output_0.out_35_1_5" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_1_output_0.out_35_1_5" ], "const_args": [ "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4" ], "out_args": [ "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51" ], "const_args": [ "onnx::MatMul_12024" ], "out_args": [ "/transformer_blocks.2/attn2/Reshape_1_output_0.out17_0_16_bfp.out21_5" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51" ], "const_args": [ "onnx::MatMul_12023" ], "out_args": [ "/transformer_blocks.2/attn2/Reshape_output_0.out17_0_17_bfp.out21_4" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51" ], "const_args": [ "onnx::MatMul_12025" ], "out_args": [ "/transformer_blocks.2/attn2/to_v/Add_output_0.out17_3_19_bfp.out25_17" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn2/MatMul_17_2mha_18_0_5", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.2/attn2/Reshape_output_0.out17_0_17_bfp.out21_4", "/transformer_blocks.2/attn2/Reshape_1_output_0.out17_0_16_bfp.out21_5", "/transformer_blocks.2/attn2/to_v/Add_output_0.out17_3_19_bfp.out25_17" ], "const_args": [], "out_args": [ "/transformer_blocks.2/attn2/Reshape_3_output_0.out20_2_bfp.out27_0_5" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/attn2/Reshape_3_output_0.out20_2_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_12033" ], "out_args": [ "/transformer_blocks.2/attn2/to_out.0/Add_output_0.out17_3_20_bfp.out25_18" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1" ], "out_args": [ "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1" ], "out_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_6_16_2", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50" ], "const_args": [ "onnx::MatMul_12011_onnx::MatMul_11996" ], "out_args": [ "/transformer_blocks.2/attn/Concat_6_output_0_16_2.out18_1_2_bfp.out19_4" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_7_16_2", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50" ], "const_args": [ "onnx::MatMul_12012_onnx::MatMul_11997" ], "out_args": [ "/transformer_blocks.2/attn/Concat_7_16_2.out18_1_2_bfp.out19_5" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50" ], "const_args": [ "onnx::MatMul_12013_onnx::MatMul_11998" ], "out_args": [ "/transformer_blocks.2/attn/Concat_8_3d.out18_1_2_bfp.out23_2" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/MatMul_16_2mha_18_0_4", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.2/attn/Concat_6_output_0_16_2.out18_1_2_bfp.out19_4", "/transformer_blocks.2/attn/Concat_7_16_2.out18_1_2_bfp.out19_5", "/transformer_blocks.2/attn/Concat_8_3d.out18_1_2_bfp.out23_2" ], "const_args": [], "out_args": [ "/transformer_blocks.2/attn/Reshape_6_output_0.out18_1_2_bfp.out27_0_4" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_6_output_0.out18_1_2_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_12021" ], "out_args": [ "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.1/Add_8_output_0.out10_6" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_5_output_0.out10_53" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_6_output_0.out18_1_2_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_12022" ], "out_args": [ "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/Add_1_output_0.out_35_1_5" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_output_0.out10_50" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn2/to_out.0/Add_output_0.out17_3_20_bfp.out25_18", "/transformer_blocks.2/Add_output_0.out10_50" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_1_output_0.out10_51" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_1_output_0.out10_51" ], "const_args": [ "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_6_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_7" ], "out_args": [ "/transformer_blocks.2/Add_3_output_0.out0_0_13_bfp.out1_52" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out0_0_13_bfp.out1_52" ], "const_args": [ "onnx::MatMul_12034" ], "out_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_19" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_19" ], "const_args": [ "onnx::MatMul_12035" ], "out_args": [ "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_21_bfp.out25_21" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_5_output_0.out10_53" ], "const_args": [ "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4" ], "out_args": [ "/transformer_blocks.2/Add_7_output_0.out0_0_14_bfp.out1_54" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_7_output_0.out0_0_14_bfp.out1_54" ], "const_args": [ "onnx::MatMul_12036" ], "out_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_20" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_20" ], "const_args": [ "onnx::MatMul_12037" ], "out_args": [ "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_22_bfp.out25_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_22_bfp.out25_22", "/transformer_blocks.2/Add_5_output_0.out10_53" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_8_output_0.out10_54" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_8_output_0.out10_54" ], "const_args": [ "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_21_bfp.out25_21", "/transformer_blocks.2/Add_1_output_0.out10_51" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_8_gma" ], "out_args": [ "/transformer_blocks.2/Add_4_output_0.out10_52" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_2", "type": "SDAdd", "in_args": [ "/transformer_blocks.2/Add_4_output_0.out10_52", "block_controlnet_hidden_states_1.out_35_1_6" ], "const_args": [], "out_args": [ "/Add_2_output_0.out_35_1_6" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_2_output_0.out_35_1_6" ], "const_args": [ "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4" ], "out_args": [ "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71" ], "const_args": [ "onnx::MatMul_12066" ], "out_args": [ "/transformer_blocks.3/attn2/Reshape_1_output_0.out17_0_22_bfp.out21_7" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71" ], "const_args": [ "onnx::MatMul_12065" ], "out_args": [ "/transformer_blocks.3/attn2/Reshape_output_0.out17_0_23_bfp.out21_6" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71" ], "const_args": [ "onnx::MatMul_12067" ], "out_args": [ "/transformer_blocks.3/attn2/to_v/Add_output_0.out17_3_25_bfp.out25_23" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn2/MatMul_17_3mha_18_0_7", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.3/attn2/Reshape_output_0.out17_0_23_bfp.out21_6", "/transformer_blocks.3/attn2/Reshape_1_output_0.out17_0_22_bfp.out21_7", "/transformer_blocks.3/attn2/to_v/Add_output_0.out17_3_25_bfp.out25_23" ], "const_args": [], "out_args": [ "/transformer_blocks.3/attn2/Reshape_3_output_0.out20_3_bfp.out27_0_7" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/attn2/Reshape_3_output_0.out20_3_bfp.out27_0_7" ], "const_args": [ "onnx::MatMul_12075" ], "out_args": [ "/transformer_blocks.3/attn2/to_out.0/Add_output_0.out17_3_26_bfp.out25_24" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1" ], "out_args": [ "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1" ], "out_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_6_16_3", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70" ], "const_args": [ "onnx::MatMul_12053_onnx::MatMul_12038" ], "out_args": [ "/transformer_blocks.3/attn/Concat_6_output_0_16_3.out18_1_3_bfp.out19_6" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_7_16_3", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70" ], "const_args": [ "onnx::MatMul_12054_onnx::MatMul_12039" ], "out_args": [ "/transformer_blocks.3/attn/Concat_7_16_3.out18_1_3_bfp.out19_7" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70" ], "const_args": [ "onnx::MatMul_12055_onnx::MatMul_12040" ], "out_args": [ "/transformer_blocks.3/attn/Concat_8_3d.out18_1_3_bfp.out23_3" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/MatMul_16_3mha_18_0_6", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.3/attn/Concat_6_output_0_16_3.out18_1_3_bfp.out19_6", "/transformer_blocks.3/attn/Concat_7_16_3.out18_1_3_bfp.out19_7", "/transformer_blocks.3/attn/Concat_8_3d.out18_1_3_bfp.out23_3" ], "const_args": [], "out_args": [ "/transformer_blocks.3/attn/Reshape_6_output_0.out18_1_3_bfp.out27_0_6" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_6_output_0.out18_1_3_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_12063" ], "out_args": [ "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.2/Add_8_output_0.out10_54" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_5_output_0.out10_72" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_6_output_0.out18_1_3_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_12064" ], "out_args": [ "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/Add_2_output_0.out_35_1_6" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_output_0.out10_69" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn2/to_out.0/Add_output_0.out17_3_26_bfp.out25_24", "/transformer_blocks.3/Add_output_0.out10_69" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_1_output_0.out10_70" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_1_output_0.out10_70" ], "const_args": [ "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_6_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_7" ], "out_args": [ "/transformer_blocks.3/Add_3_output_0.out0_0_18_bfp.out1_72" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out0_0_18_bfp.out1_72" ], "const_args": [ "onnx::MatMul_12076" ], "out_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_25" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_25" ], "const_args": [ "onnx::MatMul_12077" ], "out_args": [ "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_27_bfp.out25_27" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_27_bfp.out25_27", "/transformer_blocks.3/Add_1_output_0.out10_70" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_8_gma" ], "out_args": [ "/transformer_blocks.3/Add_4_output_0.out10_71" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_3", "type": "SDAdd", "in_args": [ "/transformer_blocks.3/Add_4_output_0.out10_71", "block_controlnet_hidden_states_1.out_35_1_6" ], "const_args": [], "out_args": [ "/Add_3_output_0.out_35_1_7" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_3_output_0.out_35_1_7" ], "const_args": [ "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4" ], "out_args": [ "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_5_output_0.out10_72" ], "const_args": [ "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4" ], "out_args": [ "/transformer_blocks.3/Add_7_output_0.out0_0_19_bfp.out1_74" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_7_output_0.out0_0_19_bfp.out1_74" ], "const_args": [ "onnx::MatMul_12078" ], "out_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_26" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_26" ], "const_args": [ "onnx::MatMul_12079" ], "out_args": [ "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_28" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_28", "/transformer_blocks.3/Add_5_output_0.out10_72" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_8_output_0.out10_73" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_8_output_0.out10_73" ], "const_args": [ "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76" ], "const_args": [ "onnx::MatMul_12108" ], "out_args": [ "/transformer_blocks.4/attn2/Reshape_1_output_0.out17_0_28_bfp.out21_9" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76" ], "const_args": [ "onnx::MatMul_12107" ], "out_args": [ "/transformer_blocks.4/attn2/Reshape_output_0.out17_0_29_bfp.out21_8" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76" ], "const_args": [ "onnx::MatMul_12109" ], "out_args": [ "/transformer_blocks.4/attn2/to_v/Add_output_0.out17_3_31_bfp.out25_29" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn2/MatMul_17_4mha_18_0_9", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.4/attn2/Reshape_output_0.out17_0_29_bfp.out21_8", "/transformer_blocks.4/attn2/Reshape_1_output_0.out17_0_28_bfp.out21_9", "/transformer_blocks.4/attn2/to_v/Add_output_0.out17_3_31_bfp.out25_29" ], "const_args": [], "out_args": [ "/transformer_blocks.4/attn2/Reshape_3_output_0.out20_4_bfp.out27_0_9" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/attn2/Reshape_3_output_0.out20_4_bfp.out27_0_9" ], "const_args": [ "onnx::MatMul_12117" ], "out_args": [ "/transformer_blocks.4/attn2/to_out.0/Add_output_0.out17_3_32_bfp.out25_30" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1" ], "out_args": [ "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1" ], "out_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_6_16_4", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75" ], "const_args": [ "onnx::MatMul_12095_onnx::MatMul_12080" ], "out_args": [ "/transformer_blocks.4/attn/Concat_6_output_0_16_4.out18_1_4_bfp.out19_8" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_7_16_4", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75" ], "const_args": [ "onnx::MatMul_12096_onnx::MatMul_12081" ], "out_args": [ "/transformer_blocks.4/attn/Concat_7_16_4.out18_1_4_bfp.out19_9" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75" ], "const_args": [ "onnx::MatMul_12097_onnx::MatMul_12082" ], "out_args": [ "/transformer_blocks.4/attn/Concat_8_3d.out18_1_4_bfp.out23_4" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/MatMul_16_4mha_18_0_8", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.4/attn/Concat_6_output_0_16_4.out18_1_4_bfp.out19_8", "/transformer_blocks.4/attn/Concat_7_16_4.out18_1_4_bfp.out19_9", "/transformer_blocks.4/attn/Concat_8_3d.out18_1_4_bfp.out23_4" ], "const_args": [], "out_args": [ "/transformer_blocks.4/attn/Reshape_6_output_0.out18_1_4_bfp.out27_0_8" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_6_output_0.out18_1_4_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_12105" ], "out_args": [ "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.3/Add_8_output_0.out10_73" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_5_output_0.out10_77" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_6_output_0.out18_1_4_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_12106" ], "out_args": [ "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/Add_3_output_0.out_35_1_7" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_output_0.out10_74" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn2/to_out.0/Add_output_0.out17_3_32_bfp.out25_30", "/transformer_blocks.4/Add_output_0.out10_74" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_1_output_0.out10_75" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_1_output_0.out10_75" ], "const_args": [ "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_6_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_7" ], "out_args": [ "/transformer_blocks.4/Add_3_output_0.out0_0_23_bfp.out1_77" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out0_0_23_bfp.out1_77" ], "const_args": [ "onnx::MatMul_12118" ], "out_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_31" ], "const_args": [ "onnx::MatMul_12119" ], "out_args": [ "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_33_bfp.out25_33" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_5_output_0.out10_77" ], "const_args": [ "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4" ], "out_args": [ "/transformer_blocks.4/Add_7_output_0.out0_0_24_bfp.out1_79" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_7_output_0.out0_0_24_bfp.out1_79" ], "const_args": [ "onnx::MatMul_12120" ], "out_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_32" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_32" ], "const_args": [ "onnx::MatMul_12121" ], "out_args": [ "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_34_bfp.out25_34" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_34_bfp.out25_34", "/transformer_blocks.4/Add_5_output_0.out10_77" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_8_output_0.out10_78" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_8_output_0.out10_78" ], "const_args": [ "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_33_bfp.out25_33", "/transformer_blocks.4/Add_1_output_0.out10_75" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_8_gma" ], "out_args": [ "/transformer_blocks.4/Add_4_output_0.out10_76" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_4", "type": "SDAdd", "in_args": [ "/transformer_blocks.4/Add_4_output_0.out10_76", "block_controlnet_hidden_states_2.out_35_1_8" ], "const_args": [], "out_args": [ "/Add_4_output_0.out_35_1_8" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_4_output_0.out_35_1_8" ], "const_args": [ "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4" ], "out_args": [ "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81" ], "const_args": [ "onnx::MatMul_12150" ], "out_args": [ "/transformer_blocks.5/attn2/Reshape_1_output_0.out17_0_34_bfp.out21_11" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81" ], "const_args": [ "onnx::MatMul_12149" ], "out_args": [ "/transformer_blocks.5/attn2/Reshape_output_0.out17_0_35_bfp.out21_10" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81" ], "const_args": [ "onnx::MatMul_12151" ], "out_args": [ "/transformer_blocks.5/attn2/to_v/Add_output_0.out17_3_37_bfp.out25_35" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn2/MatMul_17_5mha_18_0_11", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.5/attn2/Reshape_output_0.out17_0_35_bfp.out21_10", "/transformer_blocks.5/attn2/Reshape_1_output_0.out17_0_34_bfp.out21_11", "/transformer_blocks.5/attn2/to_v/Add_output_0.out17_3_37_bfp.out25_35" ], "const_args": [], "out_args": [ "/transformer_blocks.5/attn2/Reshape_3_output_0.out20_5_bfp.out27_0_11" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/attn2/Reshape_3_output_0.out20_5_bfp.out27_0_11" ], "const_args": [ "onnx::MatMul_12159" ], "out_args": [ "/transformer_blocks.5/attn2/to_out.0/Add_output_0.out17_3_38_bfp.out25_36" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1" ], "out_args": [ "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1" ], "out_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_6_16_5", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80" ], "const_args": [ "onnx::MatMul_12137_onnx::MatMul_12122" ], "out_args": [ "/transformer_blocks.5/attn/Concat_6_output_0_16_5.out18_1_5_bfp.out19_10" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_7_16_5", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80" ], "const_args": [ "onnx::MatMul_12138_onnx::MatMul_12123" ], "out_args": [ "/transformer_blocks.5/attn/Concat_7_16_5.out18_1_5_bfp.out19_11" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80" ], "const_args": [ "onnx::MatMul_12139_onnx::MatMul_12124" ], "out_args": [ "/transformer_blocks.5/attn/Concat_8_3d.out18_1_5_bfp.out23_5" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/MatMul_16_5mha_18_0_10", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.5/attn/Concat_6_output_0_16_5.out18_1_5_bfp.out19_10", "/transformer_blocks.5/attn/Concat_7_16_5.out18_1_5_bfp.out19_11", "/transformer_blocks.5/attn/Concat_8_3d.out18_1_5_bfp.out23_5" ], "const_args": [], "out_args": [ "/transformer_blocks.5/attn/Reshape_6_output_0.out18_1_5_bfp.out27_0_10" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_6_output_0.out18_1_5_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_12147" ], "out_args": [ "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.4/Add_8_output_0.out10_78" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_5_output_0.out10_82" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_6_output_0.out18_1_5_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_12148" ], "out_args": [ "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/Add_4_output_0.out_35_1_8" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_output_0.out10_79" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn2/to_out.0/Add_output_0.out17_3_38_bfp.out25_36", "/transformer_blocks.5/Add_output_0.out10_79" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_1_output_0.out10_80" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_1_output_0.out10_80" ], "const_args": [ "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_6_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_7" ], "out_args": [ "/transformer_blocks.5/Add_3_output_0.out0_0_28_bfp.out1_82" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_3_output_0.out0_0_28_bfp.out1_82" ], "const_args": [ "onnx::MatMul_12160" ], "out_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_37" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_37" ], "const_args": [ "onnx::MatMul_12161" ], "out_args": [ "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_39_bfp.out25_39" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_39_bfp.out25_39", "/transformer_blocks.5/Add_1_output_0.out10_80" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_8_gma" ], "out_args": [ "/transformer_blocks.5/Add_4_output_0.out10_81" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_5", "type": "SDAdd", "in_args": [ "/transformer_blocks.5/Add_4_output_0.out10_81", "block_controlnet_hidden_states_2.out_35_1_8" ], "const_args": [], "out_args": [ "/Add_5_output_0.out_35_1_9" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_5_output_0.out_35_1_9" ], "const_args": [ "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4" ], "out_args": [ "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_5_output_0.out10_82" ], "const_args": [ "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4" ], "out_args": [ "/transformer_blocks.5/Add_7_output_0.out0_0_29_bfp.out1_84" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_7_output_0.out0_0_29_bfp.out1_84" ], "const_args": [ "onnx::MatMul_12162" ], "out_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_38" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_38" ], "const_args": [ "onnx::MatMul_12163" ], "out_args": [ "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_40_bfp.out25_40" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_40_bfp.out25_40", "/transformer_blocks.5/Add_5_output_0.out10_82" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_8_output_0.out10_83" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_8_output_0.out10_83" ], "const_args": [ "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86" ], "const_args": [ "onnx::MatMul_12192" ], "out_args": [ "/transformer_blocks.6/attn2/Reshape_1_output_0.out17_0_40_bfp.out21_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86" ], "const_args": [ "onnx::MatMul_12191" ], "out_args": [ "/transformer_blocks.6/attn2/Reshape_output_0.out17_0_41_bfp.out21_12" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86" ], "const_args": [ "onnx::MatMul_12193" ], "out_args": [ "/transformer_blocks.6/attn2/to_v/Add_output_0.out17_3_43_bfp.out25_41" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn2/MatMul_17_6mha_18_0_13", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.6/attn2/Reshape_output_0.out17_0_41_bfp.out21_12", "/transformer_blocks.6/attn2/Reshape_1_output_0.out17_0_40_bfp.out21_13", "/transformer_blocks.6/attn2/to_v/Add_output_0.out17_3_43_bfp.out25_41" ], "const_args": [], "out_args": [ "/transformer_blocks.6/attn2/Reshape_3_output_0.out20_6_bfp.out27_0_13" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/attn2/Reshape_3_output_0.out20_6_bfp.out27_0_13" ], "const_args": [ "onnx::MatMul_12201" ], "out_args": [ "/transformer_blocks.6/attn2/to_out.0/Add_output_0.out17_3_44_bfp.out25_42" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1" ], "out_args": [ "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1" ], "out_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_6_16_6", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85" ], "const_args": [ "onnx::MatMul_12179_onnx::MatMul_12164" ], "out_args": [ "/transformer_blocks.6/attn/Concat_6_output_0_16_6.out18_1_6_bfp.out19_12" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_7_16_6", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85" ], "const_args": [ "onnx::MatMul_12180_onnx::MatMul_12165" ], "out_args": [ "/transformer_blocks.6/attn/Concat_7_16_6.out18_1_6_bfp.out19_13" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85" ], "const_args": [ "onnx::MatMul_12181_onnx::MatMul_12166" ], "out_args": [ "/transformer_blocks.6/attn/Concat_8_3d.out18_1_6_bfp.out23_6" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/MatMul_16_6mha_18_0_12", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.6/attn/Concat_6_output_0_16_6.out18_1_6_bfp.out19_12", "/transformer_blocks.6/attn/Concat_7_16_6.out18_1_6_bfp.out19_13", "/transformer_blocks.6/attn/Concat_8_3d.out18_1_6_bfp.out23_6" ], "const_args": [], "out_args": [ "/transformer_blocks.6/attn/Reshape_6_output_0.out18_1_6_bfp.out27_0_12" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_6_output_0.out18_1_6_bfp.out27_0_12" ], "const_args": [ "onnx::MatMul_12189" ], "out_args": [ "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.5/Add_8_output_0.out10_83" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_5_output_0.out10_87" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_6_output_0.out18_1_6_bfp.out27_0_12" ], "const_args": [ "onnx::MatMul_12190" ], "out_args": [ "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/Add_5_output_0.out_35_1_9" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_output_0.out10_84" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn2/to_out.0/Add_output_0.out17_3_44_bfp.out25_42", "/transformer_blocks.6/Add_output_0.out10_84" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_1_output_0.out10_85" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_1_output_0.out10_85" ], "const_args": [ "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_6_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_7" ], "out_args": [ "/transformer_blocks.6/Add_3_output_0.out0_0_33_bfp.out1_87" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_3_output_0.out0_0_33_bfp.out1_87" ], "const_args": [ "onnx::MatMul_12202" ], "out_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_43" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_43" ], "const_args": [ "onnx::MatMul_12203" ], "out_args": [ "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_45_bfp.out25_45" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_5_output_0.out10_87" ], "const_args": [ "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4" ], "out_args": [ "/transformer_blocks.6/Add_7_output_0.out0_0_34_bfp.out1_89" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_7_output_0.out0_0_34_bfp.out1_89" ], "const_args": [ "onnx::MatMul_12204" ], "out_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_44" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_44" ], "const_args": [ "onnx::MatMul_12205" ], "out_args": [ "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_46_bfp.out25_46" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_46_bfp.out25_46", "/transformer_blocks.6/Add_5_output_0.out10_87" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_8_output_0.out10_88" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_8_output_0.out10_88" ], "const_args": [ "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_45_bfp.out25_45", "/transformer_blocks.6/Add_1_output_0.out10_85" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_8_gma" ], "out_args": [ "/transformer_blocks.6/Add_4_output_0.out10_86" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_6", "type": "SDAdd", "in_args": [ "/transformer_blocks.6/Add_4_output_0.out10_86", "block_controlnet_hidden_states_3.out_35_1_10" ], "const_args": [], "out_args": [ "/Add_6_output_0.out_35_1_10" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_6_output_0.out_35_1_10" ], "const_args": [ "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4" ], "out_args": [ "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91" ], "const_args": [ "onnx::MatMul_12234" ], "out_args": [ "/transformer_blocks.7/attn2/Reshape_1_output_0.out17_0_46_bfp.out21_15" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91" ], "const_args": [ "onnx::MatMul_12233" ], "out_args": [ "/transformer_blocks.7/attn2/Reshape_output_0.out17_0_47_bfp.out21_14" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91" ], "const_args": [ "onnx::MatMul_12235" ], "out_args": [ "/transformer_blocks.7/attn2/to_v/Add_output_0.out17_3_49_bfp.out25_47" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn2/MatMul_17_7mha_18_0_15", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.7/attn2/Reshape_output_0.out17_0_47_bfp.out21_14", "/transformer_blocks.7/attn2/Reshape_1_output_0.out17_0_46_bfp.out21_15", "/transformer_blocks.7/attn2/to_v/Add_output_0.out17_3_49_bfp.out25_47" ], "const_args": [], "out_args": [ "/transformer_blocks.7/attn2/Reshape_3_output_0.out20_7_bfp.out27_0_15" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/attn2/Reshape_3_output_0.out20_7_bfp.out27_0_15" ], "const_args": [ "onnx::MatMul_12243" ], "out_args": [ "/transformer_blocks.7/attn2/to_out.0/Add_output_0.out17_3_50_bfp.out25_48" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1" ], "out_args": [ "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1" ], "out_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_6_16_7", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90" ], "const_args": [ "onnx::MatMul_12221_onnx::MatMul_12206" ], "out_args": [ "/transformer_blocks.7/attn/Concat_6_output_0_16_7.out18_1_7_bfp.out19_14" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_7_16_7", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90" ], "const_args": [ "onnx::MatMul_12222_onnx::MatMul_12207" ], "out_args": [ "/transformer_blocks.7/attn/Concat_7_16_7.out18_1_7_bfp.out19_15" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90" ], "const_args": [ "onnx::MatMul_12223_onnx::MatMul_12208" ], "out_args": [ "/transformer_blocks.7/attn/Concat_8_3d.out18_1_7_bfp.out23_7" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/MatMul_16_7mha_18_0_14", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.7/attn/Concat_6_output_0_16_7.out18_1_7_bfp.out19_14", "/transformer_blocks.7/attn/Concat_7_16_7.out18_1_7_bfp.out19_15", "/transformer_blocks.7/attn/Concat_8_3d.out18_1_7_bfp.out23_7" ], "const_args": [], "out_args": [ "/transformer_blocks.7/attn/Reshape_6_output_0.out18_1_7_bfp.out27_0_14" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_6_output_0.out18_1_7_bfp.out27_0_14" ], "const_args": [ "onnx::MatMul_12231" ], "out_args": [ "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.6/Add_8_output_0.out10_88" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_5_output_0.out10_92" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_6_output_0.out18_1_7_bfp.out27_0_14" ], "const_args": [ "onnx::MatMul_12232" ], "out_args": [ "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/Add_6_output_0.out_35_1_10" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_output_0.out10_89" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn2/to_out.0/Add_output_0.out17_3_50_bfp.out25_48", "/transformer_blocks.7/Add_output_0.out10_89" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_1_output_0.out10_90" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_1_output_0.out10_90" ], "const_args": [ "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_6_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_7" ], "out_args": [ "/transformer_blocks.7/Add_3_output_0.out0_0_38_bfp.out1_92" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_3_output_0.out0_0_38_bfp.out1_92" ], "const_args": [ "onnx::MatMul_12244" ], "out_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_49" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_49" ], "const_args": [ "onnx::MatMul_12245" ], "out_args": [ "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_51_bfp.out25_51" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_51_bfp.out25_51", "/transformer_blocks.7/Add_1_output_0.out10_90" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_8_gma" ], "out_args": [ "/transformer_blocks.7/Add_4_output_0.out10_91" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_7", "type": "SDAdd", "in_args": [ "/transformer_blocks.7/Add_4_output_0.out10_91", "block_controlnet_hidden_states_3.out_35_1_10" ], "const_args": [], "out_args": [ "/Add_7_output_0.out_35_1_11" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_7_output_0.out_35_1_11" ], "const_args": [ "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4" ], "out_args": [ "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_5_output_0.out10_92" ], "const_args": [ "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4" ], "out_args": [ "/transformer_blocks.7/Add_7_output_0.out0_0_39_bfp.out1_94" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_7_output_0.out0_0_39_bfp.out1_94" ], "const_args": [ "onnx::MatMul_12246" ], "out_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_50" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_50" ], "const_args": [ "onnx::MatMul_12247" ], "out_args": [ "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_52" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_52", "/transformer_blocks.7/Add_5_output_0.out10_92" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_8_output_0.out10_93" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_8_output_0.out10_93" ], "const_args": [ "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96" ], "const_args": [ "onnx::MatMul_12276" ], "out_args": [ "/transformer_blocks.8/attn2/Reshape_1_output_0.out17_0_52_bfp.out21_17" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96" ], "const_args": [ "onnx::MatMul_12275" ], "out_args": [ "/transformer_blocks.8/attn2/Reshape_output_0.out17_0_53_bfp.out21_16" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96" ], "const_args": [ "onnx::MatMul_12277" ], "out_args": [ "/transformer_blocks.8/attn2/to_v/Add_output_0.out17_3_55_bfp.out25_53" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn2/MatMul_17_8mha_18_0_17", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.8/attn2/Reshape_output_0.out17_0_53_bfp.out21_16", "/transformer_blocks.8/attn2/Reshape_1_output_0.out17_0_52_bfp.out21_17", "/transformer_blocks.8/attn2/to_v/Add_output_0.out17_3_55_bfp.out25_53" ], "const_args": [], "out_args": [ "/transformer_blocks.8/attn2/Reshape_3_output_0.out20_8_bfp.out27_0_17" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/attn2/Reshape_3_output_0.out20_8_bfp.out27_0_17" ], "const_args": [ "onnx::MatMul_12285" ], "out_args": [ "/transformer_blocks.8/attn2/to_out.0/Add_output_0.out17_3_56_bfp.out25_54" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1" ], "out_args": [ "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1" ], "out_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_6_16_8", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95" ], "const_args": [ "onnx::MatMul_12263_onnx::MatMul_12248" ], "out_args": [ "/transformer_blocks.8/attn/Concat_6_output_0_16_8.out18_1_8_bfp.out19_16" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_7_16_8", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95" ], "const_args": [ "onnx::MatMul_12264_onnx::MatMul_12249" ], "out_args": [ "/transformer_blocks.8/attn/Concat_7_16_8.out18_1_8_bfp.out19_17" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95" ], "const_args": [ "onnx::MatMul_12265_onnx::MatMul_12250" ], "out_args": [ "/transformer_blocks.8/attn/Concat_8_3d.out18_1_8_bfp.out23_8" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/MatMul_16_8mha_18_0_16", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.8/attn/Concat_6_output_0_16_8.out18_1_8_bfp.out19_16", "/transformer_blocks.8/attn/Concat_7_16_8.out18_1_8_bfp.out19_17", "/transformer_blocks.8/attn/Concat_8_3d.out18_1_8_bfp.out23_8" ], "const_args": [], "out_args": [ "/transformer_blocks.8/attn/Reshape_6_output_0.out18_1_8_bfp.out27_0_16" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_6_output_0.out18_1_8_bfp.out27_0_16" ], "const_args": [ "onnx::MatMul_12273" ], "out_args": [ "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.7/Add_8_output_0.out10_93" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_5_output_0.out10_97" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_6_output_0.out18_1_8_bfp.out27_0_16" ], "const_args": [ "onnx::MatMul_12274" ], "out_args": [ "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/Add_7_output_0.out_35_1_11" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_output_0.out10_94" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn2/to_out.0/Add_output_0.out17_3_56_bfp.out25_54", "/transformer_blocks.8/Add_output_0.out10_94" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_1_output_0.out10_95" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_1_output_0.out10_95" ], "const_args": [ "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_6_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_7" ], "out_args": [ "/transformer_blocks.8/Add_3_output_0.out0_0_43_bfp.out1_97" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_3_output_0.out0_0_43_bfp.out1_97" ], "const_args": [ "onnx::MatMul_12286" ], "out_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_55" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_55" ], "const_args": [ "onnx::MatMul_12287" ], "out_args": [ "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_57_bfp.out25_57" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_5_output_0.out10_97" ], "const_args": [ "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4" ], "out_args": [ "/transformer_blocks.8/Add_7_output_0.out0_0_44_bfp.out1_99" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_7_output_0.out0_0_44_bfp.out1_99" ], "const_args": [ "onnx::MatMul_12288" ], "out_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_56" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_56" ], "const_args": [ "onnx::MatMul_12289" ], "out_args": [ "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_58_bfp.out25_58" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_58_bfp.out25_58", "/transformer_blocks.8/Add_5_output_0.out10_97" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_8_output_0.out10_98" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_8_output_0.out10_98" ], "const_args": [ "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_57_bfp.out25_57", "/transformer_blocks.8/Add_1_output_0.out10_95" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_8_gma" ], "out_args": [ "/transformer_blocks.8/Add_4_output_0.out10_96" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_8", "type": "SDAdd", "in_args": [ "/transformer_blocks.8/Add_4_output_0.out10_96", "block_controlnet_hidden_states_4.out_35_1_12" ], "const_args": [], "out_args": [ "/Add_8_output_0.out_35_1_12" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_8_output_0.out_35_1_12" ], "const_args": [ "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4" ], "out_args": [ "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101" ], "const_args": [ "onnx::MatMul_12318" ], "out_args": [ "/transformer_blocks.9/attn2/Reshape_1_output_0.out17_0_58_bfp.out21_19" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101" ], "const_args": [ "onnx::MatMul_12317" ], "out_args": [ "/transformer_blocks.9/attn2/Reshape_output_0.out17_0_59_bfp.out21_18" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101" ], "const_args": [ "onnx::MatMul_12319" ], "out_args": [ "/transformer_blocks.9/attn2/to_v/Add_output_0.out17_3_61_bfp.out25_59" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn2/MatMul_17_9mha_18_0_19", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.9/attn2/Reshape_output_0.out17_0_59_bfp.out21_18", "/transformer_blocks.9/attn2/Reshape_1_output_0.out17_0_58_bfp.out21_19", "/transformer_blocks.9/attn2/to_v/Add_output_0.out17_3_61_bfp.out25_59" ], "const_args": [], "out_args": [ "/transformer_blocks.9/attn2/Reshape_3_output_0.out20_9_bfp.out27_0_19" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/attn2/Reshape_3_output_0.out20_9_bfp.out27_0_19" ], "const_args": [ "onnx::MatMul_12327" ], "out_args": [ "/transformer_blocks.9/attn2/to_out.0/Add_output_0.out17_3_62_bfp.out25_60" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1" ], "out_args": [ "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1" ], "out_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_6_16_9", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100" ], "const_args": [ "onnx::MatMul_12305_onnx::MatMul_12290" ], "out_args": [ "/transformer_blocks.9/attn/Concat_6_output_0_16_9.out18_1_9_bfp.out19_18" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_7_16_9", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100" ], "const_args": [ "onnx::MatMul_12306_onnx::MatMul_12291" ], "out_args": [ "/transformer_blocks.9/attn/Concat_7_16_9.out18_1_9_bfp.out19_19" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100" ], "const_args": [ "onnx::MatMul_12307_onnx::MatMul_12292" ], "out_args": [ "/transformer_blocks.9/attn/Concat_8_3d.out18_1_9_bfp.out23_9" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/MatMul_16_9mha_18_0_18", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.9/attn/Concat_6_output_0_16_9.out18_1_9_bfp.out19_18", "/transformer_blocks.9/attn/Concat_7_16_9.out18_1_9_bfp.out19_19", "/transformer_blocks.9/attn/Concat_8_3d.out18_1_9_bfp.out23_9" ], "const_args": [], "out_args": [ "/transformer_blocks.9/attn/Reshape_6_output_0.out18_1_9_bfp.out27_0_18" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_6_output_0.out18_1_9_bfp.out27_0_18" ], "const_args": [ "onnx::MatMul_12315" ], "out_args": [ "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.8/Add_8_output_0.out10_98" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_5_output_0.out10_102" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_6_output_0.out18_1_9_bfp.out27_0_18" ], "const_args": [ "onnx::MatMul_12316" ], "out_args": [ "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/Add_8_output_0.out_35_1_12" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_output_0.out10_99" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn2/to_out.0/Add_output_0.out17_3_62_bfp.out25_60", "/transformer_blocks.9/Add_output_0.out10_99" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_1_output_0.out10_100" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_1_output_0.out10_100" ], "const_args": [ "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_6_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_7" ], "out_args": [ "/transformer_blocks.9/Add_3_output_0.out0_0_48_bfp.out1_102" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_3_output_0.out0_0_48_bfp.out1_102" ], "const_args": [ "onnx::MatMul_12328" ], "out_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_61" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_61" ], "const_args": [ "onnx::MatMul_12329" ], "out_args": [ "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_63_bfp.out25_63" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_63_bfp.out25_63", "/transformer_blocks.9/Add_1_output_0.out10_100" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_8_gma" ], "out_args": [ "/transformer_blocks.9/Add_4_output_0.out10_101" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_9", "type": "SDAdd", "in_args": [ "/transformer_blocks.9/Add_4_output_0.out10_101", "block_controlnet_hidden_states_4.out_35_1_12" ], "const_args": [], "out_args": [ "/Add_9_output_0.out_35_1_13" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_9_output_0.out_35_1_13" ], "const_args": [ "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1" ], "out_args": [ "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4" ], "out_args": [ "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_5_output_0.out10_102" ], "const_args": [ "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4" ], "out_args": [ "/transformer_blocks.9/Add_7_output_0.out0_0_49_bfp.out1_104" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_7_output_0.out0_0_49_bfp.out1_104" ], "const_args": [ "onnx::MatMul_12330" ], "out_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_62" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_62" ], "const_args": [ "onnx::MatMul_12331" ], "out_args": [ "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_64_bfp.out25_64" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_64_bfp.out25_64", "/transformer_blocks.9/Add_5_output_0.out10_102" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_8_output_0.out10_103" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_8_output_0.out10_103" ], "const_args": [ "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1" ], "out_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_6_16_10", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7" ], "const_args": [ "onnx::MatMul_12347_onnx::MatMul_12332" ], "out_args": [ "/transformer_blocks.10/attn/Concat_6_output_0_16_10.out18_1_10_bfp.out19_20" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_7_16_10", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7" ], "const_args": [ "onnx::MatMul_12348_onnx::MatMul_12333" ], "out_args": [ "/transformer_blocks.10/attn/Concat_7_16_10.out18_1_10_bfp.out19_21" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7" ], "const_args": [ "onnx::MatMul_12349_onnx::MatMul_12334" ], "out_args": [ "/transformer_blocks.10/attn/Concat_8_3d.out18_1_10_bfp.out23_10" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/MatMul_16_10mha_18_0_20", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.10/attn/Concat_6_output_0_16_10.out18_1_10_bfp.out19_20", "/transformer_blocks.10/attn/Concat_7_16_10.out18_1_10_bfp.out19_21", "/transformer_blocks.10/attn/Concat_8_3d.out18_1_10_bfp.out23_10" ], "const_args": [], "out_args": [ "/transformer_blocks.10/attn/Reshape_6_output_0.out18_1_10_bfp.out27_0_20" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_6_output_0.out18_1_10_bfp.out27_0_20" ], "const_args": [ "onnx::MatMul_12358" ], "out_args": [ "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/Add_9_output_0.out_35_1_13" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_output_0.out10_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_6_output_0.out18_1_10_bfp.out27_0_20" ], "const_args": [ "onnx::MatMul_12357" ], "out_args": [ "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8" ], "const_args": [ "onnx::MatMul_12360" ], "out_args": [ "/transformer_blocks.10/attn2/Reshape_1_output_0.out17_0_64_bfp.out21_21" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.9/Add_8_output_0.out10_103" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_5_output_0.out10_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_5_output_0.out10_10" ], "const_args": [ "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4" ], "out_args": [ "/transformer_blocks.10/Add_7_output_0.out0_0_54_bfp.out1_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_7_output_0.out0_0_54_bfp.out1_11" ], "const_args": [ "onnx::MatMul_12372" ], "out_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_68" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_68" ], "const_args": [ "onnx::MatMul_12373" ], "out_args": [ "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_70_bfp.out25_70" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_70_bfp.out25_70", "/transformer_blocks.10/Add_5_output_0.out10_10" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_8_output_0.out10_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_8_output_0.out10_11" ], "const_args": [ "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8" ], "const_args": [ "onnx::MatMul_12359" ], "out_args": [ "/transformer_blocks.10/attn2/Reshape_output_0.out17_0_65_bfp.out21_20" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8" ], "const_args": [ "onnx::MatMul_12361" ], "out_args": [ "/transformer_blocks.10/attn2/to_v/Add_output_0.out17_3_67_bfp.out25_65" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn2/MatMul_17_10mha_18_0_21", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.10/attn2/Reshape_output_0.out17_0_65_bfp.out21_20", "/transformer_blocks.10/attn2/Reshape_1_output_0.out17_0_64_bfp.out21_21", "/transformer_blocks.10/attn2/to_v/Add_output_0.out17_3_67_bfp.out25_65" ], "const_args": [], "out_args": [ "/transformer_blocks.10/attn2/Reshape_3_output_0.out20_10_bfp.out27_0_21" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/attn2/Reshape_3_output_0.out20_10_bfp.out27_0_21" ], "const_args": [ "onnx::MatMul_12369" ], "out_args": [ "/transformer_blocks.10/attn2/to_out.0/Add_output_0.out17_3_68_bfp.out25_66" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn2/to_out.0/Add_output_0.out17_3_68_bfp.out25_66", "/transformer_blocks.10/Add_output_0.out10_7" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_1_output_0.out10_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_1_output_0.out10_8" ], "const_args": [ "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_6_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_7" ], "out_args": [ "/transformer_blocks.10/Add_3_output_0.out0_0_53_bfp.out1_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_3_output_0.out0_0_53_bfp.out1_9" ], "const_args": [ "onnx::MatMul_12370" ], "out_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_67" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_67" ], "const_args": [ "onnx::MatMul_12371" ], "out_args": [ "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_69_bfp.out25_69" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_69_bfp.out25_69", "/transformer_blocks.10/Add_1_output_0.out10_8" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_8_gma" ], "out_args": [ "/transformer_blocks.10/Add_4_output_0.out10_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_10", "type": "SDAdd", "in_args": [ "/transformer_blocks.10/Add_4_output_0.out10_9", "block_controlnet_hidden_states_5.out_35_1_14" ], "const_args": [], "out_args": [ "/Add_10_output_0.out_35_1_14" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_10_output_0.out_35_1_14" ], "const_args": [ "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4" ], "out_args": [ "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1" ], "out_args": [ "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1" ], "out_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_6_16_11", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12" ], "const_args": [ "onnx::MatMul_12389_onnx::MatMul_12374" ], "out_args": [ "/transformer_blocks.11/attn/Concat_6_output_0_16_11.out18_1_11_bfp.out19_22" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_7_16_11", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12" ], "const_args": [ "onnx::MatMul_12390_onnx::MatMul_12375" ], "out_args": [ "/transformer_blocks.11/attn/Concat_7_16_11.out18_1_11_bfp.out19_23" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12" ], "const_args": [ "onnx::MatMul_12391_onnx::MatMul_12376" ], "out_args": [ "/transformer_blocks.11/attn/Concat_8_3d.out18_1_11_bfp.out23_11" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/MatMul_16_11mha_18_0_22", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.11/attn/Concat_6_output_0_16_11.out18_1_11_bfp.out19_22", "/transformer_blocks.11/attn/Concat_7_16_11.out18_1_11_bfp.out19_23", "/transformer_blocks.11/attn/Concat_8_3d.out18_1_11_bfp.out23_11" ], "const_args": [], "out_args": [ "/transformer_blocks.11/attn/Reshape_6_output_0.out18_1_11_bfp.out27_0_22" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.11/attn/Reshape_6_output_0.out18_1_11_bfp.out27_0_22" ], "const_args": [ "onnx::MatMul_12400" ], "out_args": [ "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/Add_10_output_0.out_35_1_14" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma" ], "out_args": [ "/transformer_blocks.11/Add_output_0.out10_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.11/attn/Reshape_6_output_0.out18_1_11_bfp.out27_0_22" ], "const_args": [ "onnx::MatMul_12399" ], "out_args": [ "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13" ], "const_args": [ "onnx::MatMul_12402" ], "out_args": [ "/transformer_blocks.11/attn2/Reshape_1_output_0.out17_0_70_bfp.out21_23" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23", "/transformer_blocks.10/Add_8_output_0.out10_11" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma" ], "out_args": [ "/transformer_blocks.11/Add_5_output_0.out10_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_5_output_0.out10_15" ], "const_args": [ "/transformer_blocks.11/norm2_context/Constant_output_0", "/transformer_blocks.11/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4" ], "out_args": [ "/transformer_blocks.11/Add_7_output_0.out0_0_59_bfp.out1_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/Add_7_output_0.out0_0_59_bfp.out1_16" ], "const_args": [ "onnx::MatMul_12414" ], "out_args": [ "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_74" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_74" ], "const_args": [ "onnx::MatMul_12415" ], "out_args": [ "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_76" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_76", "/transformer_blocks.11/Add_5_output_0.out10_15" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma" ], "out_args": [ "/transformer_blocks.11/Add_8_output_0.out10_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_8_output_0.out10_16" ], "const_args": [ "/transformer_blocks.12/norm1_context/norm/Constant_output_0", "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13" ], "const_args": [ "onnx::MatMul_12401" ], "out_args": [ "/transformer_blocks.11/attn2/Reshape_output_0.out17_0_71_bfp.out21_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13" ], "const_args": [ "onnx::MatMul_12403" ], "out_args": [ "/transformer_blocks.11/attn2/to_v/Add_output_0.out17_3_73_bfp.out25_71" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn2/MatMul_17_11mha_18_0_23", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.11/attn2/Reshape_output_0.out17_0_71_bfp.out21_22", "/transformer_blocks.11/attn2/Reshape_1_output_0.out17_0_70_bfp.out21_23", "/transformer_blocks.11/attn2/to_v/Add_output_0.out17_3_73_bfp.out25_71" ], "const_args": [], "out_args": [ "/transformer_blocks.11/attn2/Reshape_3_output_0.out20_11_bfp.out27_0_23" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/attn2/Reshape_3_output_0.out20_11_bfp.out27_0_23" ], "const_args": [ "onnx::MatMul_12411" ], "out_args": [ "/transformer_blocks.11/attn2/to_out.0/Add_output_0.out17_3_74_bfp.out25_72" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn2/to_out.0/Add_output_0.out17_3_74_bfp.out25_72", "/transformer_blocks.11/Add_output_0.out10_12" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma" ], "out_args": [ "/transformer_blocks.11/Add_1_output_0.out10_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_1_output_0.out10_13" ], "const_args": [ "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_6_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_7" ], "out_args": [ "/transformer_blocks.11/Add_3_output_0.out0_0_58_bfp.out1_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/Add_3_output_0.out0_0_58_bfp.out1_14" ], "const_args": [ "onnx::MatMul_12412" ], "out_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_73" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_73" ], "const_args": [ "onnx::MatMul_12413" ], "out_args": [ "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_75_bfp.out25_75" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_75_bfp.out25_75", "/transformer_blocks.11/Add_1_output_0.out10_13" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_8_gma" ], "out_args": [ "/transformer_blocks.11/Add_4_output_0.out10_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_11", "type": "SDAdd", "in_args": [ "/transformer_blocks.11/Add_4_output_0.out10_14", "block_controlnet_hidden_states_5.out_35_1_14" ], "const_args": [], "out_args": [ "/Add_11_output_0.out_35_1_15" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_11_output_0.out_35_1_15" ], "const_args": [ "/transformer_blocks.12/norm1/norm/Constant_output_0", "/transformer_blocks.12/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1/Add_4", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4" ], "out_args": [ "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn2/to_k/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18" ], "const_args": [ "onnx::MatMul_12444" ], "out_args": [ "/transformer_blocks.12/attn2/Reshape_1_output_0.out17_0_76_bfp.out21_25" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1" ], "out_args": [ "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1" ], "out_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat_6_16_12", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17" ], "const_args": [ "onnx::MatMul_12431_onnx::MatMul_12416" ], "out_args": [ "/transformer_blocks.12/attn/Concat_6_output_0_16_12.out18_1_12_bfp.out19_24" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat_7_16_12", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17" ], "const_args": [ "onnx::MatMul_12432_onnx::MatMul_12417" ], "out_args": [ "/transformer_blocks.12/attn/Concat_7_16_12.out18_1_12_bfp.out19_25" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17" ], "const_args": [ "onnx::MatMul_12433_onnx::MatMul_12418" ], "out_args": [ "/transformer_blocks.12/attn/Concat_8_3d.out18_1_12_bfp.out23_12" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/MatMul_16_12mha_18_0_24", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.12/attn/Concat_6_output_0_16_12.out18_1_12_bfp.out19_24", "/transformer_blocks.12/attn/Concat_7_16_12.out18_1_12_bfp.out19_25", "/transformer_blocks.12/attn/Concat_8_3d.out18_1_12_bfp.out23_12" ], "const_args": [], "out_args": [ "/transformer_blocks.12/attn/Reshape_6_output_0.out18_1_12_bfp.out27_0_24" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.12/attn/Reshape_6_output_0.out18_1_12_bfp.out27_0_24" ], "const_args": [ "onnx::MatMul_12442" ], "out_args": [ "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24", "/Add_11_output_0.out_35_1_15" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma" ], "out_args": [ "/transformer_blocks.12/Add_output_0.out10_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.12/attn/Reshape_6_output_0.out18_1_12_bfp.out27_0_24" ], "const_args": [ "onnx::MatMul_12441" ], "out_args": [ "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_5", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25", "/transformer_blocks.11/Add_8_output_0.out10_16" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma" ], "out_args": [ "/transformer_blocks.12/Add_5_output_0.out10_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_5_output_0.out10_20" ], "const_args": [ "/transformer_blocks.12/norm2_context/Constant_output_0", "/transformer_blocks.12/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_7", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4" ], "out_args": [ "/transformer_blocks.12/Add_7_output_0.out0_0_64_bfp.out1_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/Add_7_output_0.out0_0_64_bfp.out1_21" ], "const_args": [ "onnx::MatMul_12456" ], "out_args": [ "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_80" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_80" ], "const_args": [ "onnx::MatMul_12457" ], "out_args": [ "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_82_bfp.out25_82" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_8", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_82_bfp.out25_82", "/transformer_blocks.12/Add_5_output_0.out10_20" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma" ], "out_args": [ "/transformer_blocks.12/Add_8_output_0.out10_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_8_output_0.out10_21" ], "const_args": [ "/transformer_blocks.13/norm1_context/norm/Constant_output_0", "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn2/to_q/MatMul", "type": "SDGemmRN_bfp", "in_args": [ "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18" ], "const_args": [ "onnx::MatMul_12443" ], "out_args": [ "/transformer_blocks.12/attn2/Reshape_output_0.out17_0_77_bfp.out21_24" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "scale_shape": { "type": "int", "value": [ "64" ] }, "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn2/to_v/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18" ], "const_args": [ "onnx::MatMul_12445" ], "out_args": [ "/transformer_blocks.12/attn2/to_v/Add_output_0.out17_3_79_bfp.out25_77" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn2/MatMul_17_12mha_18_0_25", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.12/attn2/Reshape_output_0.out17_0_77_bfp.out21_24", "/transformer_blocks.12/attn2/Reshape_1_output_0.out17_0_76_bfp.out21_25", "/transformer_blocks.12/attn2/to_v/Add_output_0.out17_3_79_bfp.out25_77" ], "const_args": [], "out_args": [ "/transformer_blocks.12/attn2/Reshape_3_output_0.out20_12_bfp.out27_0_25" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64", "state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn2/to_out.0/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/attn2/Reshape_3_output_0.out20_12_bfp.out27_0_25" ], "const_args": [ "onnx::MatMul_12453" ], "out_args": [ "/transformer_blocks.12/attn2/to_out.0/Add_output_0.out17_3_80_bfp.out25_78" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_1", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/attn2/to_out.0/Add_output_0.out17_3_80_bfp.out25_78", "/transformer_blocks.12/Add_output_0.out10_17" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma" ], "out_args": [ "/transformer_blocks.12/Add_1_output_0.out10_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_1_output_0.out10_18" ], "const_args": [ "/transformer_blocks.12/norm2/Constant_output_0", "/transformer_blocks.12/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_3", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_6_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_7" ], "out_args": [ "/transformer_blocks.12/Add_3_output_0.out0_0_63_bfp.out1_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/Add_3_output_0.out0_0_63_bfp.out1_19" ], "const_args": [ "onnx::MatMul_12454" ], "out_args": [ "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_79" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_79" ], "const_args": [ "onnx::MatMul_12455" ], "out_args": [ "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_81_bfp.out25_81" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_81_bfp.out25_81", "/transformer_blocks.12/Add_1_output_0.out10_18" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_8_gma" ], "out_args": [ "/transformer_blocks.12/Add_4_output_0.out10_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_12", "type": "SDAdd", "in_args": [ "/transformer_blocks.12/Add_4_output_0.out10_19", "block_controlnet_hidden_states_6.out_35_1_16" ], "const_args": [], "out_args": [ "/Add_12_output_0.out_35_1_16" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_12_output_0.out_35_1_16" ], "const_args": [ "/transformer_blocks.13/norm1/norm/Constant_output_0", "/transformer_blocks.13/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1" ], "out_args": [ "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1" ], "out_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat_6_16_13", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22" ], "const_args": [ "onnx::MatMul_12473_onnx::MatMul_12458" ], "out_args": [ "/transformer_blocks.13/attn/Concat_6_output_0_16_13.out18_1_13_bfp.out19_26" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat_7_16_13", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22" ], "const_args": [ "onnx::MatMul_12474_onnx::MatMul_12459" ], "out_args": [ "/transformer_blocks.13/attn/Concat_7_16_13.out18_1_13_bfp.out19_27" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22" ], "const_args": [ "onnx::MatMul_12475_onnx::MatMul_12460" ], "out_args": [ "/transformer_blocks.13/attn/Concat_8_3d.out18_1_13_bfp.out23_13" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/MatMul_16_13mha_18_0_26", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.13/attn/Concat_6_output_0_16_13.out18_1_13_bfp.out19_26", "/transformer_blocks.13/attn/Concat_7_16_13.out18_1_13_bfp.out19_27", "/transformer_blocks.13/attn/Concat_8_3d.out18_1_13_bfp.out23_13" ], "const_args": [], "out_args": [ "/transformer_blocks.13/attn/Reshape_6_output_0.out18_1_13_bfp.out27_0_26" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.13/attn/Reshape_6_output_0.out18_1_13_bfp.out27_0_26" ], "const_args": [ "onnx::MatMul_12484" ], "out_args": [ "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26", "/Add_12_output_0.out_35_1_16" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma" ], "out_args": [ "/transformer_blocks.13/Add_output_0.out10_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_output_0.out10_22" ], "const_args": [ "/transformer_blocks.13/norm2/Constant_output_0", "/transformer_blocks.13/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4" ], "out_args": [ "/transformer_blocks.13/Add_2_output_0.out0_0_67_bfp.out1_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/Add_2_output_0.out0_0_67_bfp.out1_23" ], "const_args": [ "onnx::MatMul_12485" ], "out_args": [ "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_83" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_83" ], "const_args": [ "onnx::MatMul_12486" ], "out_args": [ "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_85_bfp.out25_85" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_85_bfp.out25_85", "/transformer_blocks.13/Add_output_0.out10_22" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma" ], "out_args": [ "/transformer_blocks.13/Add_3_output_0.out10_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_13", "type": "SDAdd", "in_args": [ "/transformer_blocks.13/Add_3_output_0.out10_23", "block_controlnet_hidden_states_6.out_35_1_16" ], "const_args": [], "out_args": [ "/Add_13_output_0.out_35_1_17" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.13/attn/Reshape_6_output_0.out18_1_13_bfp.out27_0_26" ], "const_args": [ "onnx::MatMul_12483" ], "out_args": [ "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27", "/transformer_blocks.12/Add_8_output_0.out10_21" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma" ], "out_args": [ "/transformer_blocks.13/Add_4_output_0.out10_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_4_output_0.out10_24" ], "const_args": [ "/transformer_blocks.13/norm2_context/Constant_output_0", "/transformer_blocks.13/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4" ], "out_args": [ "/transformer_blocks.13/Add_6_output_0.out0_0_68_bfp.out1_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/Add_6_output_0.out0_0_68_bfp.out1_25" ], "const_args": [ "onnx::MatMul_12487" ], "out_args": [ "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_84" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_84" ], "const_args": [ "onnx::MatMul_12488" ], "out_args": [ "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_86_bfp.out25_86" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_86_bfp.out25_86", "/transformer_blocks.13/Add_4_output_0.out10_24" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma" ], "out_args": [ "/transformer_blocks.13/Add_7_output_0.out10_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_13_output_0.out_35_1_17" ], "const_args": [ "/transformer_blocks.14/norm1/norm/Constant_output_0", "/transformer_blocks.14/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1" ], "out_args": [ "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_7_output_0.out10_25" ], "const_args": [ "/transformer_blocks.14/norm1_context/norm/Constant_output_0", "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1" ], "out_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat_6_16_14", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26" ], "const_args": [ "onnx::MatMul_12504_onnx::MatMul_12489" ], "out_args": [ "/transformer_blocks.14/attn/Concat_6_output_0_16_14.out18_1_14_bfp.out19_28" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat_7_16_14", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26" ], "const_args": [ "onnx::MatMul_12505_onnx::MatMul_12490" ], "out_args": [ "/transformer_blocks.14/attn/Concat_7_16_14.out18_1_14_bfp.out19_29" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26" ], "const_args": [ "onnx::MatMul_12506_onnx::MatMul_12491" ], "out_args": [ "/transformer_blocks.14/attn/Concat_8_3d.out18_1_14_bfp.out23_14" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/MatMul_16_14mha_18_0_27", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.14/attn/Concat_6_output_0_16_14.out18_1_14_bfp.out19_28", "/transformer_blocks.14/attn/Concat_7_16_14.out18_1_14_bfp.out19_29", "/transformer_blocks.14/attn/Concat_8_3d.out18_1_14_bfp.out23_14" ], "const_args": [], "out_args": [ "/transformer_blocks.14/attn/Reshape_6_output_0.out18_1_14_bfp.out27_0_27" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.14/attn/Reshape_6_output_0.out18_1_14_bfp.out27_0_27" ], "const_args": [ "onnx::MatMul_12515" ], "out_args": [ "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28", "/Add_13_output_0.out_35_1_17" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma" ], "out_args": [ "/transformer_blocks.14/Add_output_0.out10_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_output_0.out10_26" ], "const_args": [ "/transformer_blocks.14/norm2/Constant_output_0", "/transformer_blocks.14/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4" ], "out_args": [ "/transformer_blocks.14/Add_2_output_0.out0_0_71_bfp.out1_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/Add_2_output_0.out0_0_71_bfp.out1_27" ], "const_args": [ "onnx::MatMul_12516" ], "out_args": [ "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_87" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_87" ], "const_args": [ "onnx::MatMul_12517" ], "out_args": [ "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_89_bfp.out25_89" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_89_bfp.out25_89", "/transformer_blocks.14/Add_output_0.out10_26" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma" ], "out_args": [ "/transformer_blocks.14/Add_3_output_0.out10_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.14/attn/Reshape_6_output_0.out18_1_14_bfp.out27_0_27" ], "const_args": [ "onnx::MatMul_12514" ], "out_args": [ "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29", "/transformer_blocks.13/Add_7_output_0.out10_25" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma" ], "out_args": [ "/transformer_blocks.14/Add_4_output_0.out10_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_4_output_0.out10_28" ], "const_args": [ "/transformer_blocks.14/norm2_context/Constant_output_0", "/transformer_blocks.14/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4" ], "out_args": [ "/transformer_blocks.14/Add_6_output_0.out0_0_72_bfp.out1_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/Add_6_output_0.out0_0_72_bfp.out1_29" ], "const_args": [ "onnx::MatMul_12518" ], "out_args": [ "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_88" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_88" ], "const_args": [ "onnx::MatMul_12519" ], "out_args": [ "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_90_bfp.out25_90" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_90_bfp.out25_90", "/transformer_blocks.14/Add_4_output_0.out10_28" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma" ], "out_args": [ "/transformer_blocks.14/Add_7_output_0.out10_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_7_output_0.out10_29" ], "const_args": [ "/transformer_blocks.15/norm1_context/norm/Constant_output_0", "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_14", "type": "SDAdd", "in_args": [ "/transformer_blocks.14/Add_3_output_0.out10_27", "block_controlnet_hidden_states_7.out_35_1_18" ], "const_args": [], "out_args": [ "/Add_14_output_0.out_35_1_18" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_14_output_0.out_35_1_18" ], "const_args": [ "/transformer_blocks.15/norm1/norm/Constant_output_0", "/transformer_blocks.15/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1" ], "out_args": [ "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1" ], "out_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat_6_16_15", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30" ], "const_args": [ "onnx::MatMul_12535_onnx::MatMul_12520" ], "out_args": [ "/transformer_blocks.15/attn/Concat_6_output_0_16_15.out18_1_15_bfp.out19_30" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat_7_16_15", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30" ], "const_args": [ "onnx::MatMul_12536_onnx::MatMul_12521" ], "out_args": [ "/transformer_blocks.15/attn/Concat_7_16_15.out18_1_15_bfp.out19_31" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30" ], "const_args": [ "onnx::MatMul_12537_onnx::MatMul_12522" ], "out_args": [ "/transformer_blocks.15/attn/Concat_8_3d.out18_1_15_bfp.out23_15" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/MatMul_16_15mha_18_0_28", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.15/attn/Concat_6_output_0_16_15.out18_1_15_bfp.out19_30", "/transformer_blocks.15/attn/Concat_7_16_15.out18_1_15_bfp.out19_31", "/transformer_blocks.15/attn/Concat_8_3d.out18_1_15_bfp.out23_15" ], "const_args": [], "out_args": [ "/transformer_blocks.15/attn/Reshape_6_output_0.out18_1_15_bfp.out27_0_28" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.15/attn/Reshape_6_output_0.out18_1_15_bfp.out27_0_28" ], "const_args": [ "onnx::MatMul_12546" ], "out_args": [ "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30", "/Add_14_output_0.out_35_1_18" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma" ], "out_args": [ "/transformer_blocks.15/Add_output_0.out10_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_output_0.out10_30" ], "const_args": [ "/transformer_blocks.15/norm2/Constant_output_0", "/transformer_blocks.15/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4" ], "out_args": [ "/transformer_blocks.15/Add_2_output_0.out0_0_75_bfp.out1_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/Add_2_output_0.out0_0_75_bfp.out1_31" ], "const_args": [ "onnx::MatMul_12547" ], "out_args": [ "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_91" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_91" ], "const_args": [ "onnx::MatMul_12548" ], "out_args": [ "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_93_bfp.out25_93" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_93_bfp.out25_93", "/transformer_blocks.15/Add_output_0.out10_30" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma" ], "out_args": [ "/transformer_blocks.15/Add_3_output_0.out10_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_15", "type": "SDAdd", "in_args": [ "/transformer_blocks.15/Add_3_output_0.out10_31", "block_controlnet_hidden_states_7.out_35_1_18" ], "const_args": [], "out_args": [ "/Add_15_output_0.out_35_1_19" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.15/attn/Reshape_6_output_0.out18_1_15_bfp.out27_0_28" ], "const_args": [ "onnx::MatMul_12545" ], "out_args": [ "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31", "/transformer_blocks.14/Add_7_output_0.out10_29" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma" ], "out_args": [ "/transformer_blocks.15/Add_4_output_0.out10_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_4_output_0.out10_32" ], "const_args": [ "/transformer_blocks.15/norm2_context/Constant_output_0", "/transformer_blocks.15/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4" ], "out_args": [ "/transformer_blocks.15/Add_6_output_0.out0_0_76_bfp.out1_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/Add_6_output_0.out0_0_76_bfp.out1_33" ], "const_args": [ "onnx::MatMul_12549" ], "out_args": [ "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_92" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_92" ], "const_args": [ "onnx::MatMul_12550" ], "out_args": [ "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_94_bfp.out25_94" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_94_bfp.out25_94", "/transformer_blocks.15/Add_4_output_0.out10_32" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma" ], "out_args": [ "/transformer_blocks.15/Add_7_output_0.out10_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_15_output_0.out_35_1_19" ], "const_args": [ "/transformer_blocks.16/norm1/norm/Constant_output_0", "/transformer_blocks.16/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1" ], "out_args": [ "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_7_output_0.out10_33" ], "const_args": [ "/transformer_blocks.16/norm1_context/norm/Constant_output_0", "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1" ], "out_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat_6_16_16", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34" ], "const_args": [ "onnx::MatMul_12566_onnx::MatMul_12551" ], "out_args": [ "/transformer_blocks.16/attn/Concat_6_output_0_16_16.out18_1_16_bfp.out19_32" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat_7_16_16", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34" ], "const_args": [ "onnx::MatMul_12567_onnx::MatMul_12552" ], "out_args": [ "/transformer_blocks.16/attn/Concat_7_16_16.out18_1_16_bfp.out19_33" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34" ], "const_args": [ "onnx::MatMul_12568_onnx::MatMul_12553" ], "out_args": [ "/transformer_blocks.16/attn/Concat_8_3d.out18_1_16_bfp.out23_16" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/MatMul_16_16mha_18_0_29", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.16/attn/Concat_6_output_0_16_16.out18_1_16_bfp.out19_32", "/transformer_blocks.16/attn/Concat_7_16_16.out18_1_16_bfp.out19_33", "/transformer_blocks.16/attn/Concat_8_3d.out18_1_16_bfp.out23_16" ], "const_args": [], "out_args": [ "/transformer_blocks.16/attn/Reshape_6_output_0.out18_1_16_bfp.out27_0_29" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.16/attn/Reshape_6_output_0.out18_1_16_bfp.out27_0_29" ], "const_args": [ "onnx::MatMul_12577" ], "out_args": [ "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32", "/Add_15_output_0.out_35_1_19" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma" ], "out_args": [ "/transformer_blocks.16/Add_output_0.out10_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_output_0.out10_34" ], "const_args": [ "/transformer_blocks.16/norm2/Constant_output_0", "/transformer_blocks.16/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4" ], "out_args": [ "/transformer_blocks.16/Add_2_output_0.out0_0_79_bfp.out1_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/Add_2_output_0.out0_0_79_bfp.out1_35" ], "const_args": [ "onnx::MatMul_12578" ], "out_args": [ "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_95" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_95" ], "const_args": [ "onnx::MatMul_12579" ], "out_args": [ "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_97_bfp.out25_97" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_97_bfp.out25_97", "/transformer_blocks.16/Add_output_0.out10_34" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma" ], "out_args": [ "/transformer_blocks.16/Add_3_output_0.out10_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.16/attn/Reshape_6_output_0.out18_1_16_bfp.out27_0_29" ], "const_args": [ "onnx::MatMul_12576" ], "out_args": [ "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33", "/transformer_blocks.15/Add_7_output_0.out10_33" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma" ], "out_args": [ "/transformer_blocks.16/Add_4_output_0.out10_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_4_output_0.out10_36" ], "const_args": [ "/transformer_blocks.16/norm2_context/Constant_output_0", "/transformer_blocks.16/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4" ], "out_args": [ "/transformer_blocks.16/Add_6_output_0.out0_0_80_bfp.out1_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/Add_6_output_0.out0_0_80_bfp.out1_37" ], "const_args": [ "onnx::MatMul_12580" ], "out_args": [ "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_96" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_96" ], "const_args": [ "onnx::MatMul_12581" ], "out_args": [ "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_98_bfp.out25_98" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_98_bfp.out25_98", "/transformer_blocks.16/Add_4_output_0.out10_36" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma" ], "out_args": [ "/transformer_blocks.16/Add_7_output_0.out10_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_7_output_0.out10_37" ], "const_args": [ "/transformer_blocks.17/norm1_context/norm/Constant_output_0", "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_16", "type": "SDAdd", "in_args": [ "/transformer_blocks.16/Add_3_output_0.out10_35", "block_controlnet_hidden_states_8.out_35_1_20" ], "const_args": [], "out_args": [ "/Add_16_output_0.out_35_1_20" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_16_output_0.out_35_1_20" ], "const_args": [ "/transformer_blocks.17/norm1/norm/Constant_output_0", "/transformer_blocks.17/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1" ], "out_args": [ "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1" ], "out_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat_6_16_17", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38" ], "const_args": [ "onnx::MatMul_12597_onnx::MatMul_12582" ], "out_args": [ "/transformer_blocks.17/attn/Concat_6_output_0_16_17.out18_1_17_bfp.out19_34" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat_7_16_17", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38" ], "const_args": [ "onnx::MatMul_12598_onnx::MatMul_12583" ], "out_args": [ "/transformer_blocks.17/attn/Concat_7_16_17.out18_1_17_bfp.out19_35" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38" ], "const_args": [ "onnx::MatMul_12599_onnx::MatMul_12584" ], "out_args": [ "/transformer_blocks.17/attn/Concat_8_3d.out18_1_17_bfp.out23_17" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/MatMul_16_17mha_18_0_30", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.17/attn/Concat_6_output_0_16_17.out18_1_17_bfp.out19_34", "/transformer_blocks.17/attn/Concat_7_16_17.out18_1_17_bfp.out19_35", "/transformer_blocks.17/attn/Concat_8_3d.out18_1_17_bfp.out23_17" ], "const_args": [], "out_args": [ "/transformer_blocks.17/attn/Reshape_6_output_0.out18_1_17_bfp.out27_0_30" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.17/attn/Reshape_6_output_0.out18_1_17_bfp.out27_0_30" ], "const_args": [ "onnx::MatMul_12608" ], "out_args": [ "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34", "/Add_16_output_0.out_35_1_20" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma" ], "out_args": [ "/transformer_blocks.17/Add_output_0.out10_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_output_0.out10_38" ], "const_args": [ "/transformer_blocks.17/norm2/Constant_output_0", "/transformer_blocks.17/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4" ], "out_args": [ "/transformer_blocks.17/Add_2_output_0.out0_0_83_bfp.out1_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/Add_2_output_0.out0_0_83_bfp.out1_39" ], "const_args": [ "onnx::MatMul_12609" ], "out_args": [ "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_99" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_99" ], "const_args": [ "onnx::MatMul_12610" ], "out_args": [ "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_101_bfp.out25_101" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_101_bfp.out25_101", "/transformer_blocks.17/Add_output_0.out10_38" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma" ], "out_args": [ "/transformer_blocks.17/Add_3_output_0.out10_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_17", "type": "SDAdd", "in_args": [ "/transformer_blocks.17/Add_3_output_0.out10_39", "block_controlnet_hidden_states_8.out_35_1_20" ], "const_args": [], "out_args": [ "/Add_17_output_0.out_35_1_21" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.17/attn/Reshape_6_output_0.out18_1_17_bfp.out27_0_30" ], "const_args": [ "onnx::MatMul_12607" ], "out_args": [ "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35", "/transformer_blocks.16/Add_7_output_0.out10_37" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma" ], "out_args": [ "/transformer_blocks.17/Add_4_output_0.out10_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_4_output_0.out10_40" ], "const_args": [ "/transformer_blocks.17/norm2_context/Constant_output_0", "/transformer_blocks.17/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4" ], "out_args": [ "/transformer_blocks.17/Add_6_output_0.out0_0_84_bfp.out1_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/Add_6_output_0.out0_0_84_bfp.out1_41" ], "const_args": [ "onnx::MatMul_12611" ], "out_args": [ "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_100" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_100" ], "const_args": [ "onnx::MatMul_12612" ], "out_args": [ "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_102" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_102", "/transformer_blocks.17/Add_4_output_0.out10_40" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma" ], "out_args": [ "/transformer_blocks.17/Add_7_output_0.out10_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_17_output_0.out_35_1_21" ], "const_args": [ "/transformer_blocks.18/norm1/norm/Constant_output_0", "/transformer_blocks.18/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1" ], "out_args": [ "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_7_output_0.out10_41" ], "const_args": [ "/transformer_blocks.18/norm1_context/norm/Constant_output_0", "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1" ], "out_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat_6_16_18", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42" ], "const_args": [ "onnx::MatMul_12628_onnx::MatMul_12613" ], "out_args": [ "/transformer_blocks.18/attn/Concat_6_output_0_16_18.out18_1_18_bfp.out19_36" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat_7_16_18", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42" ], "const_args": [ "onnx::MatMul_12629_onnx::MatMul_12614" ], "out_args": [ "/transformer_blocks.18/attn/Concat_7_16_18.out18_1_18_bfp.out19_37" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42" ], "const_args": [ "onnx::MatMul_12630_onnx::MatMul_12615" ], "out_args": [ "/transformer_blocks.18/attn/Concat_8_3d.out18_1_18_bfp.out23_18" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/MatMul_16_18mha_18_0_31", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.18/attn/Concat_6_output_0_16_18.out18_1_18_bfp.out19_36", "/transformer_blocks.18/attn/Concat_7_16_18.out18_1_18_bfp.out19_37", "/transformer_blocks.18/attn/Concat_8_3d.out18_1_18_bfp.out23_18" ], "const_args": [], "out_args": [ "/transformer_blocks.18/attn/Reshape_6_output_0.out18_1_18_bfp.out27_0_31" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.18/attn/Reshape_6_output_0.out18_1_18_bfp.out27_0_31" ], "const_args": [ "onnx::MatMul_12639" ], "out_args": [ "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36", "/Add_17_output_0.out_35_1_21" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma" ], "out_args": [ "/transformer_blocks.18/Add_output_0.out10_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_output_0.out10_42" ], "const_args": [ "/transformer_blocks.18/norm2/Constant_output_0", "/transformer_blocks.18/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4" ], "out_args": [ "/transformer_blocks.18/Add_2_output_0.out0_0_87_bfp.out1_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/Add_2_output_0.out0_0_87_bfp.out1_43" ], "const_args": [ "onnx::MatMul_12640" ], "out_args": [ "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_103" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_103" ], "const_args": [ "onnx::MatMul_12641" ], "out_args": [ "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_105_bfp.out25_105" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_105_bfp.out25_105", "/transformer_blocks.18/Add_output_0.out10_42" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma" ], "out_args": [ "/transformer_blocks.18/Add_3_output_0.out10_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.18/attn/Reshape_6_output_0.out18_1_18_bfp.out27_0_31" ], "const_args": [ "onnx::MatMul_12638" ], "out_args": [ "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37", "/transformer_blocks.17/Add_7_output_0.out10_41" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma" ], "out_args": [ "/transformer_blocks.18/Add_4_output_0.out10_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_4_output_0.out10_44" ], "const_args": [ "/transformer_blocks.18/norm2_context/Constant_output_0", "/transformer_blocks.18/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4" ], "out_args": [ "/transformer_blocks.18/Add_6_output_0.out0_0_88_bfp.out1_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/Add_6_output_0.out0_0_88_bfp.out1_45" ], "const_args": [ "onnx::MatMul_12642" ], "out_args": [ "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_104" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_104" ], "const_args": [ "onnx::MatMul_12643" ], "out_args": [ "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_106_bfp.out25_106" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_106_bfp.out25_106", "/transformer_blocks.18/Add_4_output_0.out10_44" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma" ], "out_args": [ "/transformer_blocks.18/Add_7_output_0.out10_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_7_output_0.out10_45" ], "const_args": [ "/transformer_blocks.19/norm1_context/norm/Constant_output_0", "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_18", "type": "SDAdd", "in_args": [ "/transformer_blocks.18/Add_3_output_0.out10_43", "block_controlnet_hidden_states_9.out_35_1_22" ], "const_args": [], "out_args": [ "/Add_18_output_0.out_35_1_22" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_18_output_0.out_35_1_22" ], "const_args": [ "/transformer_blocks.19/norm1/norm/Constant_output_0", "/transformer_blocks.19/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1" ], "out_args": [ "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1" ], "out_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat_6_16_19", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46" ], "const_args": [ "onnx::MatMul_12659_onnx::MatMul_12644" ], "out_args": [ "/transformer_blocks.19/attn/Concat_6_output_0_16_19.out18_1_19_bfp.out19_38" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat_7_16_19", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46" ], "const_args": [ "onnx::MatMul_12660_onnx::MatMul_12645" ], "out_args": [ "/transformer_blocks.19/attn/Concat_7_16_19.out18_1_19_bfp.out19_39" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46" ], "const_args": [ "onnx::MatMul_12661_onnx::MatMul_12646" ], "out_args": [ "/transformer_blocks.19/attn/Concat_8_3d.out18_1_19_bfp.out23_19" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/MatMul_16_19mha_18_0_32", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.19/attn/Concat_6_output_0_16_19.out18_1_19_bfp.out19_38", "/transformer_blocks.19/attn/Concat_7_16_19.out18_1_19_bfp.out19_39", "/transformer_blocks.19/attn/Concat_8_3d.out18_1_19_bfp.out23_19" ], "const_args": [], "out_args": [ "/transformer_blocks.19/attn/Reshape_6_output_0.out18_1_19_bfp.out27_0_32" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.19/attn/Reshape_6_output_0.out18_1_19_bfp.out27_0_32" ], "const_args": [ "onnx::MatMul_12670" ], "out_args": [ "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38", "/Add_18_output_0.out_35_1_22" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma" ], "out_args": [ "/transformer_blocks.19/Add_output_0.out10_46" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_output_0.out10_46" ], "const_args": [ "/transformer_blocks.19/norm2/Constant_output_0", "/transformer_blocks.19/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4" ], "out_args": [ "/transformer_blocks.19/Add_2_output_0.out0_0_91_bfp.out1_47" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/Add_2_output_0.out0_0_91_bfp.out1_47" ], "const_args": [ "onnx::MatMul_12671" ], "out_args": [ "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_107" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_107" ], "const_args": [ "onnx::MatMul_12672" ], "out_args": [ "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_109_bfp.out25_109" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_109_bfp.out25_109", "/transformer_blocks.19/Add_output_0.out10_46" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma" ], "out_args": [ "/transformer_blocks.19/Add_3_output_0.out10_47" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_19", "type": "SDAdd", "in_args": [ "/transformer_blocks.19/Add_3_output_0.out10_47", "block_controlnet_hidden_states_9.out_35_1_22" ], "const_args": [], "out_args": [ "/Add_19_output_0.out_35_1_23" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.19/attn/Reshape_6_output_0.out18_1_19_bfp.out27_0_32" ], "const_args": [ "onnx::MatMul_12669" ], "out_args": [ "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39", "/transformer_blocks.18/Add_7_output_0.out10_45" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma" ], "out_args": [ "/transformer_blocks.19/Add_4_output_0.out10_48" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_4_output_0.out10_48" ], "const_args": [ "/transformer_blocks.19/norm2_context/Constant_output_0", "/transformer_blocks.19/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4" ], "out_args": [ "/transformer_blocks.19/Add_6_output_0.out0_0_92_bfp.out1_49" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/Add_6_output_0.out0_0_92_bfp.out1_49" ], "const_args": [ "onnx::MatMul_12673" ], "out_args": [ "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_108" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_108" ], "const_args": [ "onnx::MatMul_12674" ], "out_args": [ "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_110_bfp.out25_110" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_110_bfp.out25_110", "/transformer_blocks.19/Add_4_output_0.out10_48" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma" ], "out_args": [ "/transformer_blocks.19/Add_7_output_0.out10_49" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_19_output_0.out_35_1_23" ], "const_args": [ "/transformer_blocks.20/norm1/norm/Constant_output_0", "/transformer_blocks.20/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_7_output_0.out10_49" ], "const_args": [ "/transformer_blocks.20/norm1_context/norm/Constant_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1" ], "out_args": [ "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1" ], "out_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat_6_16_20", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55" ], "const_args": [ "onnx::MatMul_12690_onnx::MatMul_12675" ], "out_args": [ "/transformer_blocks.20/attn/Concat_6_output_0_16_20.out18_1_20_bfp.out19_40" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat_7_16_20", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55" ], "const_args": [ "onnx::MatMul_12691_onnx::MatMul_12676" ], "out_args": [ "/transformer_blocks.20/attn/Concat_7_16_20.out18_1_20_bfp.out19_41" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55" ], "const_args": [ "onnx::MatMul_12692_onnx::MatMul_12677" ], "out_args": [ "/transformer_blocks.20/attn/Concat_8_3d.out18_1_20_bfp.out23_20" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/MatMul_16_20mha_18_0_33", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.20/attn/Concat_6_output_0_16_20.out18_1_20_bfp.out19_40", "/transformer_blocks.20/attn/Concat_7_16_20.out18_1_20_bfp.out19_41", "/transformer_blocks.20/attn/Concat_8_3d.out18_1_20_bfp.out23_20" ], "const_args": [], "out_args": [ "/transformer_blocks.20/attn/Reshape_6_output_0.out18_1_20_bfp.out27_0_33" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.20/attn/Reshape_6_output_0.out18_1_20_bfp.out27_0_33" ], "const_args": [ "onnx::MatMul_12700" ], "out_args": [ "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41", "/transformer_blocks.19/Add_7_output_0.out10_49" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma" ], "out_args": [ "/transformer_blocks.20/Add_4_output_0.out10_57" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.20/attn/Reshape_6_output_0.out18_1_20_bfp.out27_0_33" ], "const_args": [ "onnx::MatMul_12701" ], "out_args": [ "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40", "/Add_19_output_0.out_35_1_23" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma" ], "out_args": [ "/transformer_blocks.20/Add_output_0.out10_55" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_output_0.out10_55" ], "const_args": [ "/transformer_blocks.20/norm2/Constant_output_0", "/transformer_blocks.20/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4" ], "out_args": [ "/transformer_blocks.20/Add_2_output_0.out0_0_95_bfp.out1_56" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/Add_2_output_0.out0_0_95_bfp.out1_56" ], "const_args": [ "onnx::MatMul_12702" ], "out_args": [ "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_111" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_111" ], "const_args": [ "onnx::MatMul_12703" ], "out_args": [ "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_113_bfp.out25_113" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_113_bfp.out25_113", "/transformer_blocks.20/Add_output_0.out10_55" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma" ], "out_args": [ "/transformer_blocks.20/Add_3_output_0.out10_56" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_4_output_0.out10_57" ], "const_args": [ "/transformer_blocks.20/norm2_context/Constant_output_0", "/transformer_blocks.20/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4" ], "out_args": [ "/transformer_blocks.20/Add_6_output_0.out0_0_96_bfp.out1_58" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/Add_6_output_0.out0_0_96_bfp.out1_58" ], "const_args": [ "onnx::MatMul_12704" ], "out_args": [ "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_112" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_112" ], "const_args": [ "onnx::MatMul_12705" ], "out_args": [ "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_114_bfp.out25_114" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_114_bfp.out25_114", "/transformer_blocks.20/Add_4_output_0.out10_57" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma" ], "out_args": [ "/transformer_blocks.20/Add_7_output_0.out10_58" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_7_output_0.out10_58" ], "const_args": [ "/transformer_blocks.21/norm1_context/norm/Constant_output_0", "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_20", "type": "SDAdd", "in_args": [ "/transformer_blocks.20/Add_3_output_0.out10_56", "block_controlnet_hidden_states_10.out_35_1_24" ], "const_args": [], "out_args": [ "/Add_20_output_0.out_35_1_24" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_20_output_0.out_35_1_24" ], "const_args": [ "/transformer_blocks.21/norm1/norm/Constant_output_0", "/transformer_blocks.21/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1" ], "out_args": [ "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1" ], "out_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat_6_16_21", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59" ], "const_args": [ "onnx::MatMul_12721_onnx::MatMul_12706" ], "out_args": [ "/transformer_blocks.21/attn/Concat_6_output_0_16_21.out18_1_21_bfp.out19_42" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat_7_16_21", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59" ], "const_args": [ "onnx::MatMul_12722_onnx::MatMul_12707" ], "out_args": [ "/transformer_blocks.21/attn/Concat_7_16_21.out18_1_21_bfp.out19_43" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59" ], "const_args": [ "onnx::MatMul_12723_onnx::MatMul_12708" ], "out_args": [ "/transformer_blocks.21/attn/Concat_8_3d.out18_1_21_bfp.out23_21" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/MatMul_16_21mha_18_0_34", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.21/attn/Concat_6_output_0_16_21.out18_1_21_bfp.out19_42", "/transformer_blocks.21/attn/Concat_7_16_21.out18_1_21_bfp.out19_43", "/transformer_blocks.21/attn/Concat_8_3d.out18_1_21_bfp.out23_21" ], "const_args": [], "out_args": [ "/transformer_blocks.21/attn/Reshape_6_output_0.out18_1_21_bfp.out27_0_34" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.21/attn/Reshape_6_output_0.out18_1_21_bfp.out27_0_34" ], "const_args": [ "onnx::MatMul_12732" ], "out_args": [ "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42", "/Add_20_output_0.out_35_1_24" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma" ], "out_args": [ "/transformer_blocks.21/Add_output_0.out10_59" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_output_0.out10_59" ], "const_args": [ "/transformer_blocks.21/norm2/Constant_output_0", "/transformer_blocks.21/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4" ], "out_args": [ "/transformer_blocks.21/Add_2_output_0.out0_0_99_bfp.out1_60" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/Add_2_output_0.out0_0_99_bfp.out1_60" ], "const_args": [ "onnx::MatMul_12733" ], "out_args": [ "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_115" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_115" ], "const_args": [ "onnx::MatMul_12734" ], "out_args": [ "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_117_bfp.out25_117" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_117_bfp.out25_117", "/transformer_blocks.21/Add_output_0.out10_59" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma" ], "out_args": [ "/transformer_blocks.21/Add_3_output_0.out10_60" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_21", "type": "SDAdd", "in_args": [ "/transformer_blocks.21/Add_3_output_0.out10_60", "block_controlnet_hidden_states_10.out_35_1_24" ], "const_args": [], "out_args": [ "/Add_21_output_0.out_35_1_25" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.21/attn/Reshape_6_output_0.out18_1_21_bfp.out27_0_34" ], "const_args": [ "onnx::MatMul_12731" ], "out_args": [ "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43", "/transformer_blocks.20/Add_7_output_0.out10_58" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma" ], "out_args": [ "/transformer_blocks.21/Add_4_output_0.out10_61" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_4_output_0.out10_61" ], "const_args": [ "/transformer_blocks.21/norm2_context/Constant_output_0", "/transformer_blocks.21/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4" ], "out_args": [ "/transformer_blocks.21/Add_6_output_0.out0_0_100_bfp.out1_62" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/Add_6_output_0.out0_0_100_bfp.out1_62" ], "const_args": [ "onnx::MatMul_12735" ], "out_args": [ "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_116" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_116" ], "const_args": [ "onnx::MatMul_12736" ], "out_args": [ "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_118_bfp.out25_118" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_118_bfp.out25_118", "/transformer_blocks.21/Add_4_output_0.out10_61" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma" ], "out_args": [ "/transformer_blocks.21/Add_7_output_0.out10_62" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_21_output_0.out_35_1_25" ], "const_args": [ "/transformer_blocks.22/norm1/norm/Constant_output_0", "/transformer_blocks.22/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1" ], "out_args": [ "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_7_output_0.out10_62" ], "const_args": [ "/transformer_blocks.22/norm1_context/norm/Constant_output_0", "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1" ], "out_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat_6_16_22", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63" ], "const_args": [ "onnx::MatMul_12752_onnx::MatMul_12737" ], "out_args": [ "/transformer_blocks.22/attn/Concat_6_output_0_16_22.out18_1_22_bfp.out19_44" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat_7_16_22", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63" ], "const_args": [ "onnx::MatMul_12753_onnx::MatMul_12738" ], "out_args": [ "/transformer_blocks.22/attn/Concat_7_16_22.out18_1_22_bfp.out19_45" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63" ], "const_args": [ "onnx::MatMul_12754_onnx::MatMul_12739" ], "out_args": [ "/transformer_blocks.22/attn/Concat_8_3d.out18_1_22_bfp.out23_22" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/MatMul_16_22mha_18_0_35", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.22/attn/Concat_6_output_0_16_22.out18_1_22_bfp.out19_44", "/transformer_blocks.22/attn/Concat_7_16_22.out18_1_22_bfp.out19_45", "/transformer_blocks.22/attn/Concat_8_3d.out18_1_22_bfp.out23_22" ], "const_args": [], "out_args": [ "/transformer_blocks.22/attn/Reshape_6_output_0.out18_1_22_bfp.out27_0_35" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.22/attn/Reshape_6_output_0.out18_1_22_bfp.out27_0_35" ], "const_args": [ "onnx::MatMul_12763" ], "out_args": [ "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44", "/Add_21_output_0.out_35_1_25" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma" ], "out_args": [ "/transformer_blocks.22/Add_output_0.out10_63" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_output_0.out10_63" ], "const_args": [ "/transformer_blocks.22/norm2/Constant_output_0", "/transformer_blocks.22/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4" ], "out_args": [ "/transformer_blocks.22/Add_2_output_0.out0_0_103_bfp.out1_64" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/Add_2_output_0.out0_0_103_bfp.out1_64" ], "const_args": [ "onnx::MatMul_12764" ], "out_args": [ "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_119" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_119" ], "const_args": [ "onnx::MatMul_12765" ], "out_args": [ "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_121_bfp.out25_121" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_121_bfp.out25_121", "/transformer_blocks.22/Add_output_0.out10_63" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma" ], "out_args": [ "/transformer_blocks.22/Add_3_output_0.out10_64" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.22/attn/Reshape_6_output_0.out18_1_22_bfp.out27_0_35" ], "const_args": [ "onnx::MatMul_12762" ], "out_args": [ "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45", "/transformer_blocks.21/Add_7_output_0.out10_62" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma" ], "out_args": [ "/transformer_blocks.22/Add_4_output_0.out10_65" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_4_output_0.out10_65" ], "const_args": [ "/transformer_blocks.22/norm2_context/Constant_output_0", "/transformer_blocks.22/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4" ], "out_args": [ "/transformer_blocks.22/Add_6_output_0.out0_0_104_bfp.out1_66" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/Add_6_output_0.out0_0_104_bfp.out1_66" ], "const_args": [ "onnx::MatMul_12766" ], "out_args": [ "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_120" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_120" ], "const_args": [ "onnx::MatMul_12767" ], "out_args": [ "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_122_bfp.out25_122" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_122_bfp.out25_122", "/transformer_blocks.22/Add_4_output_0.out10_65" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma" ], "out_args": [ "/transformer_blocks.22/Add_7_output_0.out10_66" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_7_output_0.out10_66" ], "const_args": [ "/transformer_blocks.23/norm1_context/norm/Constant_output_0", "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_22", "type": "SDAdd", "in_args": [ "/transformer_blocks.22/Add_3_output_0.out10_64", "block_controlnet_hidden_states_11.out_35_1_26" ], "const_args": [], "out_args": [ "/Add_22_output_0.out_35_1_26" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_22_output_0.out_35_1_26" ], "const_args": [ "/transformer_blocks.23/norm1/norm/Constant_output_0", "/transformer_blocks.23/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1" ], "out_args": [ "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92" ], "const_args": [ "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1" ], "out_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat_6_16_23", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67" ], "const_args": [ "onnx::MatMul_12783_onnx::MatMul_12768" ], "out_args": [ "/transformer_blocks.23/attn/Concat_6_output_0_16_23.out18_1_23_bfp.out19_46" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat_7_16_23", "type": "SDGemmRNConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67" ], "const_args": [ "onnx::MatMul_12784_onnx::MatMul_12769" ], "out_args": [ "/transformer_blocks.23/attn/Concat_7_16_23.out18_1_23_bfp.out19_47" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "trans_head": { "type": "int", "value": [ "2" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "rmsnorm_axis": { "type": "int", "value": [ "-1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat_8_3d", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67" ], "const_args": [ "onnx::MatMul_12785_onnx::MatMul_12770" ], "out_args": [ "/transformer_blocks.23/attn/Concat_8_3d.out18_1_23_bfp.out23_23" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "-2" ] }, "input_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_2", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/MatMul_16_23mha_18_0_36", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.23/attn/Concat_6_output_0_16_23.out18_1_23_bfp.out19_46", "/transformer_blocks.23/attn/Concat_7_16_23.out18_1_23_bfp.out19_47", "/transformer_blocks.23/attn/Concat_8_3d.out18_1_23_bfp.out23_23" ], "const_args": [], "out_args": [ "/transformer_blocks.23/attn/Reshape_6_output_0.out18_1_23_bfp.out27_0_36" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1 + max_length", "64", "state_dim1 + max_length" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.23/attn/Reshape_6_output_0.out18_1_23_bfp.out27_0_36" ], "const_args": [ "onnx::MatMul_12790" ], "out_args": [ "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_2", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46", "/Add_22_output_0.out_35_1_26" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma" ], "out_args": [ "/transformer_blocks.23/Add_output_0.out10_67" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.23/Add_output_0.out10_67" ], "const_args": [ "/transformer_blocks.23/norm2/Constant_output_0", "/transformer_blocks.23/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4" ], "out_args": [ "/transformer_blocks.23/Add_2_output_0.out0_0_107_bfp.out1_68" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.23/Add_2_output_0.out0_0_107_bfp.out1_68" ], "const_args": [ "onnx::MatMul_12791" ], "out_args": [ "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_123" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_123" ], "const_args": [ "onnx::MatMul_12792" ], "out_args": [ "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_125_bfp.out25_124" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_125_bfp.out25_124", "/transformer_blocks.23/Add_output_0.out10_67" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma" ], "out_args": [ "/transformer_blocks.23/Add_3_output_0.out10_68" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.23/Add_3_output_0.out10_68" ], "const_args": [ "/norm_out/norm/Constant_output_0", "/norm_out/norm/Constant_1_output_0" ], "out_args": [ "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95" ], "const_args": [ "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1" ], "out_args": [ "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_2", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105" ], "const_args": [ "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105_bfp.wts" ], "out_args": [ "/norm_out/Add_2_output_0.out0_0_108" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/proj_out/MatMul", "type": "SDGemm", "in_args": [ "/norm_out/Add_2_output_0.out0_0_108" ], "const_args": [ "onnx::MatMul_12793" ], "out_args": [ "/Reshape_output_0.out17_0_122" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_2", "state_dim1", "64" ] }, "weight_shape": { "type": "int", "value": [ "1536", "64" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } } ], "fused_tensors": { "in": { "buffer_size": 86560, "xrt_arg_id": 0, "packed_tensors": [ "hidden_states_nhwc.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2", "/time_text_embed/Cast_output_0.out17_3_3", "pooled_projections.out17_3_1", "encoder_hidden_states.out17_3_0", "block_controlnet_hidden_states_0.out_35_1_4", "block_controlnet_hidden_states_1.out_35_1_6", "block_controlnet_hidden_states_2.out_35_1_8", "block_controlnet_hidden_states_3.out_35_1_10", "block_controlnet_hidden_states_4.out_35_1_12", "block_controlnet_hidden_states_5.out_35_1_14", "block_controlnet_hidden_states_6.out_35_1_16", "block_controlnet_hidden_states_7.out_35_1_18", "block_controlnet_hidden_states_8.out_35_1_20", "block_controlnet_hidden_states_9.out_35_1_22", "block_controlnet_hidden_states_10.out_35_1_24", "block_controlnet_hidden_states_11.out_35_1_26" ] }, "out": { "buffer_size": 128, "xrt_arg_id": 1, "packed_tensors": [ "/Reshape_output_0.out17_0_122" ] }, "scratch": { "buffer_size": 1437184, "xrt_arg_id": 2, "packed_tensors": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Add_2_output_0.out_35_1_2", "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3", "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1", "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1", "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2", "/time_text_embed/Add_output_0.out_35_1_3", "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "encoder_hidden_states.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0", "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106", "/transformer_blocks.0/attn/Concat_6_output_0_16_0.out18_1_0_bfp.out19_0", "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107", "/transformer_blocks.0/attn/Concat_7_16_0.out18_1_0_bfp.out19_1", "/transformer_blocks.0/attn/Concat_8_3d.out18_1_0_bfp.out23_0", "/transformer_blocks.0/attn/Reshape_6_output_0.out18_1_0_bfp.out27_0_0", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/transformer_blocks.0/Add_5_output_0.out10_0", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3", "/transformer_blocks.0/Add_7_output_0.out0_0_4_bfp.out1_1", "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_8", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_10_bfp.out25_10", "/transformer_blocks.0/Add_8_output_0.out10_1", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/transformer_blocks.0/Add_output_0.out10_104", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4", "/transformer_blocks.0/attn2/Reshape_1_output_0.out17_0_4_bfp.out21_1", "/transformer_blocks.0/attn2/Reshape_output_0.out17_0_5_bfp.out21_0", "/transformer_blocks.0/attn2/to_v/Add_output_0.out17_3_7_bfp.out25_5", "/transformer_blocks.0/attn2/Reshape_3_output_0.out20_0_bfp.out27_0_1", "/transformer_blocks.0/attn2/to_out.0/Add_output_0.out17_3_8_bfp.out25_6", "/transformer_blocks.0/Add_1_output_0.out10_105", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2", "/transformer_blocks.0/Add_3_output_0.out0_0_3_bfp.out1_108", "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_7", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_9_bfp.out25_9", "/transformer_blocks.0/Add_4_output_0.out10_106", "/Add_output_0.out_35_1_4", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2", "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3", "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5", "/transformer_blocks.1/attn/Concat_6_output_0_16_1.out18_1_1_bfp.out19_2", "/transformer_blocks.1/attn/Concat_7_16_1.out18_1_1_bfp.out19_3", "/transformer_blocks.1/attn/Concat_8_3d.out18_1_1_bfp.out23_1", "/transformer_blocks.1/attn/Reshape_6_output_0.out18_1_1_bfp.out27_0_2", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.1/Add_output_0.out10_2", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.1/attn2/Reshape_1_output_0.out17_0_10_bfp.out21_3", "/transformer_blocks.1/Add_5_output_0.out10_5", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7", "/transformer_blocks.1/Add_7_output_0.out0_0_9_bfp.out1_6", "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_14", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_16_bfp.out25_16", "/transformer_blocks.1/Add_8_output_0.out10_6", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8", "/transformer_blocks.1/attn2/Reshape_output_0.out17_0_11_bfp.out21_2", "/transformer_blocks.1/attn2/to_v/Add_output_0.out17_3_13_bfp.out25_11", "/transformer_blocks.1/attn2/Reshape_3_output_0.out20_1_bfp.out27_0_3", "/transformer_blocks.1/attn2/to_out.0/Add_output_0.out17_3_14_bfp.out25_12", "/transformer_blocks.1/Add_1_output_0.out10_3", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6", "/transformer_blocks.1/Add_3_output_0.out0_0_8_bfp.out1_4", "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_13", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_15_bfp.out25_15", "/transformer_blocks.1/Add_4_output_0.out10_4", "/Add_1_output_0.out_35_1_5", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9", "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51", "/transformer_blocks.2/attn2/Reshape_1_output_0.out17_0_16_bfp.out21_5", "/transformer_blocks.2/attn2/Reshape_output_0.out17_0_17_bfp.out21_4", "/transformer_blocks.2/attn2/to_v/Add_output_0.out17_3_19_bfp.out25_17", "/transformer_blocks.2/attn2/Reshape_3_output_0.out20_2_bfp.out27_0_5", "/transformer_blocks.2/attn2/to_out.0/Add_output_0.out17_3_20_bfp.out25_18", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50", "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53", "/transformer_blocks.2/attn/Concat_6_output_0_16_2.out18_1_2_bfp.out19_4", "/transformer_blocks.2/attn/Concat_7_16_2.out18_1_2_bfp.out19_5", "/transformer_blocks.2/attn/Concat_8_3d.out18_1_2_bfp.out23_2", "/transformer_blocks.2/attn/Reshape_6_output_0.out18_1_2_bfp.out27_0_4", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.2/Add_5_output_0.out10_53", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.2/Add_output_0.out10_50", "/transformer_blocks.2/Add_1_output_0.out10_51", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10", "/transformer_blocks.2/Add_3_output_0.out0_0_13_bfp.out1_52", "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_19", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_21_bfp.out25_21", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11", "/transformer_blocks.2/Add_7_output_0.out0_0_14_bfp.out1_54", "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_20", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_22_bfp.out25_22", "/transformer_blocks.2/Add_8_output_0.out10_54", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12", "/transformer_blocks.2/Add_4_output_0.out10_52", "/Add_2_output_0.out_35_1_6", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13", "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71", "/transformer_blocks.3/attn2/Reshape_1_output_0.out17_0_22_bfp.out21_7", "/transformer_blocks.3/attn2/Reshape_output_0.out17_0_23_bfp.out21_6", "/transformer_blocks.3/attn2/to_v/Add_output_0.out17_3_25_bfp.out25_23", "/transformer_blocks.3/attn2/Reshape_3_output_0.out20_3_bfp.out27_0_7", "/transformer_blocks.3/attn2/to_out.0/Add_output_0.out17_3_26_bfp.out25_24", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70", "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73", "/transformer_blocks.3/attn/Concat_6_output_0_16_3.out18_1_3_bfp.out19_6", "/transformer_blocks.3/attn/Concat_7_16_3.out18_1_3_bfp.out19_7", "/transformer_blocks.3/attn/Concat_8_3d.out18_1_3_bfp.out23_3", "/transformer_blocks.3/attn/Reshape_6_output_0.out18_1_3_bfp.out27_0_6", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.3/Add_5_output_0.out10_72", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.3/Add_output_0.out10_69", "/transformer_blocks.3/Add_1_output_0.out10_70", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14", "/transformer_blocks.3/Add_3_output_0.out0_0_18_bfp.out1_72", "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_25", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_27_bfp.out25_27", "/transformer_blocks.3/Add_4_output_0.out10_71", "/Add_3_output_0.out_35_1_7", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17", "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15", "/transformer_blocks.3/Add_7_output_0.out0_0_19_bfp.out1_74", "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_26", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_28", "/transformer_blocks.3/Add_8_output_0.out10_73", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16", "/transformer_blocks.4/attn2/Reshape_1_output_0.out17_0_28_bfp.out21_9", "/transformer_blocks.4/attn2/Reshape_output_0.out17_0_29_bfp.out21_8", "/transformer_blocks.4/attn2/to_v/Add_output_0.out17_3_31_bfp.out25_29", "/transformer_blocks.4/attn2/Reshape_3_output_0.out20_4_bfp.out27_0_9", "/transformer_blocks.4/attn2/to_out.0/Add_output_0.out17_3_32_bfp.out25_30", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75", "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78", "/transformer_blocks.4/attn/Concat_6_output_0_16_4.out18_1_4_bfp.out19_8", "/transformer_blocks.4/attn/Concat_7_16_4.out18_1_4_bfp.out19_9", "/transformer_blocks.4/attn/Concat_8_3d.out18_1_4_bfp.out23_4", "/transformer_blocks.4/attn/Reshape_6_output_0.out18_1_4_bfp.out27_0_8", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.4/Add_5_output_0.out10_77", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.4/Add_output_0.out10_74", "/transformer_blocks.4/Add_1_output_0.out10_75", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18", "/transformer_blocks.4/Add_3_output_0.out0_0_23_bfp.out1_77", "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_31", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_33_bfp.out25_33", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19", "/transformer_blocks.4/Add_7_output_0.out0_0_24_bfp.out1_79", "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_32", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_34_bfp.out25_34", "/transformer_blocks.4/Add_8_output_0.out10_78", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20", "/transformer_blocks.4/Add_4_output_0.out10_76", "/Add_4_output_0.out_35_1_8", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21", "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81", "/transformer_blocks.5/attn2/Reshape_1_output_0.out17_0_34_bfp.out21_11", "/transformer_blocks.5/attn2/Reshape_output_0.out17_0_35_bfp.out21_10", "/transformer_blocks.5/attn2/to_v/Add_output_0.out17_3_37_bfp.out25_35", "/transformer_blocks.5/attn2/Reshape_3_output_0.out20_5_bfp.out27_0_11", "/transformer_blocks.5/attn2/to_out.0/Add_output_0.out17_3_38_bfp.out25_36", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80", "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83", "/transformer_blocks.5/attn/Concat_6_output_0_16_5.out18_1_5_bfp.out19_10", "/transformer_blocks.5/attn/Concat_7_16_5.out18_1_5_bfp.out19_11", "/transformer_blocks.5/attn/Concat_8_3d.out18_1_5_bfp.out23_5", "/transformer_blocks.5/attn/Reshape_6_output_0.out18_1_5_bfp.out27_0_10", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.5/Add_5_output_0.out10_82", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.5/Add_output_0.out10_79", "/transformer_blocks.5/Add_1_output_0.out10_80", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22", "/transformer_blocks.5/Add_3_output_0.out0_0_28_bfp.out1_82", "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_37", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_39_bfp.out25_39", "/transformer_blocks.5/Add_4_output_0.out10_81", "/Add_5_output_0.out_35_1_9", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25", "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23", "/transformer_blocks.5/Add_7_output_0.out0_0_29_bfp.out1_84", "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_38", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_40_bfp.out25_40", "/transformer_blocks.5/Add_8_output_0.out10_83", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24", "/transformer_blocks.6/attn2/Reshape_1_output_0.out17_0_40_bfp.out21_13", "/transformer_blocks.6/attn2/Reshape_output_0.out17_0_41_bfp.out21_12", "/transformer_blocks.6/attn2/to_v/Add_output_0.out17_3_43_bfp.out25_41", "/transformer_blocks.6/attn2/Reshape_3_output_0.out20_6_bfp.out27_0_13", "/transformer_blocks.6/attn2/to_out.0/Add_output_0.out17_3_44_bfp.out25_42", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85", "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88", "/transformer_blocks.6/attn/Concat_6_output_0_16_6.out18_1_6_bfp.out19_12", "/transformer_blocks.6/attn/Concat_7_16_6.out18_1_6_bfp.out19_13", "/transformer_blocks.6/attn/Concat_8_3d.out18_1_6_bfp.out23_6", "/transformer_blocks.6/attn/Reshape_6_output_0.out18_1_6_bfp.out27_0_12", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.6/Add_5_output_0.out10_87", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/transformer_blocks.6/Add_output_0.out10_84", "/transformer_blocks.6/Add_1_output_0.out10_85", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26", "/transformer_blocks.6/Add_3_output_0.out0_0_33_bfp.out1_87", "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_43", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_45_bfp.out25_45", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27", "/transformer_blocks.6/Add_7_output_0.out0_0_34_bfp.out1_89", "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_44", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_46_bfp.out25_46", "/transformer_blocks.6/Add_8_output_0.out10_88", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28", "/transformer_blocks.6/Add_4_output_0.out10_86", "/Add_6_output_0.out_35_1_10", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29", "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91", "/transformer_blocks.7/attn2/Reshape_1_output_0.out17_0_46_bfp.out21_15", "/transformer_blocks.7/attn2/Reshape_output_0.out17_0_47_bfp.out21_14", "/transformer_blocks.7/attn2/to_v/Add_output_0.out17_3_49_bfp.out25_47", "/transformer_blocks.7/attn2/Reshape_3_output_0.out20_7_bfp.out27_0_15", "/transformer_blocks.7/attn2/to_out.0/Add_output_0.out17_3_50_bfp.out25_48", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90", "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93", "/transformer_blocks.7/attn/Concat_6_output_0_16_7.out18_1_7_bfp.out19_14", "/transformer_blocks.7/attn/Concat_7_16_7.out18_1_7_bfp.out19_15", "/transformer_blocks.7/attn/Concat_8_3d.out18_1_7_bfp.out23_7", "/transformer_blocks.7/attn/Reshape_6_output_0.out18_1_7_bfp.out27_0_14", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.7/Add_5_output_0.out10_92", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/transformer_blocks.7/Add_output_0.out10_89", "/transformer_blocks.7/Add_1_output_0.out10_90", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30", "/transformer_blocks.7/Add_3_output_0.out0_0_38_bfp.out1_92", "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_49", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_51_bfp.out25_51", "/transformer_blocks.7/Add_4_output_0.out10_91", "/Add_7_output_0.out_35_1_11", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33", "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31", "/transformer_blocks.7/Add_7_output_0.out0_0_39_bfp.out1_94", "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_50", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_52", "/transformer_blocks.7/Add_8_output_0.out10_93", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32", "/transformer_blocks.8/attn2/Reshape_1_output_0.out17_0_52_bfp.out21_17", "/transformer_blocks.8/attn2/Reshape_output_0.out17_0_53_bfp.out21_16", "/transformer_blocks.8/attn2/to_v/Add_output_0.out17_3_55_bfp.out25_53", "/transformer_blocks.8/attn2/Reshape_3_output_0.out20_8_bfp.out27_0_17", "/transformer_blocks.8/attn2/to_out.0/Add_output_0.out17_3_56_bfp.out25_54", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95", "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98", "/transformer_blocks.8/attn/Concat_6_output_0_16_8.out18_1_8_bfp.out19_16", "/transformer_blocks.8/attn/Concat_7_16_8.out18_1_8_bfp.out19_17", "/transformer_blocks.8/attn/Concat_8_3d.out18_1_8_bfp.out23_8", "/transformer_blocks.8/attn/Reshape_6_output_0.out18_1_8_bfp.out27_0_16", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.8/Add_5_output_0.out10_97", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/transformer_blocks.8/Add_output_0.out10_94", "/transformer_blocks.8/Add_1_output_0.out10_95", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34", "/transformer_blocks.8/Add_3_output_0.out0_0_43_bfp.out1_97", "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_55", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_57_bfp.out25_57", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35", "/transformer_blocks.8/Add_7_output_0.out0_0_44_bfp.out1_99", "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_56", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_58_bfp.out25_58", "/transformer_blocks.8/Add_8_output_0.out10_98", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36", "/transformer_blocks.8/Add_4_output_0.out10_96", "/Add_8_output_0.out_35_1_12", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37", "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101", "/transformer_blocks.9/attn2/Reshape_1_output_0.out17_0_58_bfp.out21_19", "/transformer_blocks.9/attn2/Reshape_output_0.out17_0_59_bfp.out21_18", "/transformer_blocks.9/attn2/to_v/Add_output_0.out17_3_61_bfp.out25_59", "/transformer_blocks.9/attn2/Reshape_3_output_0.out20_9_bfp.out27_0_19", "/transformer_blocks.9/attn2/to_out.0/Add_output_0.out17_3_62_bfp.out25_60", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100", "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103", "/transformer_blocks.9/attn/Concat_6_output_0_16_9.out18_1_9_bfp.out19_18", "/transformer_blocks.9/attn/Concat_7_16_9.out18_1_9_bfp.out19_19", "/transformer_blocks.9/attn/Concat_8_3d.out18_1_9_bfp.out23_9", "/transformer_blocks.9/attn/Reshape_6_output_0.out18_1_9_bfp.out27_0_18", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.9/Add_5_output_0.out10_102", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/transformer_blocks.9/Add_output_0.out10_99", "/transformer_blocks.9/Add_1_output_0.out10_100", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38", "/transformer_blocks.9/Add_3_output_0.out0_0_48_bfp.out1_102", "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_61", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_63_bfp.out25_63", "/transformer_blocks.9/Add_4_output_0.out10_101", "/Add_9_output_0.out_35_1_13", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7", "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39", "/transformer_blocks.9/Add_7_output_0.out0_0_49_bfp.out1_104", "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_62", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_64_bfp.out25_64", "/transformer_blocks.9/Add_8_output_0.out10_103", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41", "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10", "/transformer_blocks.10/attn/Concat_6_output_0_16_10.out18_1_10_bfp.out19_20", "/transformer_blocks.10/attn/Concat_7_16_10.out18_1_10_bfp.out19_21", "/transformer_blocks.10/attn/Concat_8_3d.out18_1_10_bfp.out23_10", "/transformer_blocks.10/attn/Reshape_6_output_0.out18_1_10_bfp.out27_0_20", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/transformer_blocks.10/Add_output_0.out10_7", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.10/attn2/Reshape_1_output_0.out17_0_64_bfp.out21_21", "/transformer_blocks.10/Add_5_output_0.out10_10", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43", "/transformer_blocks.10/Add_7_output_0.out0_0_54_bfp.out1_11", "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_68", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_70_bfp.out25_70", "/transformer_blocks.10/Add_8_output_0.out10_11", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44", "/transformer_blocks.10/attn2/Reshape_output_0.out17_0_65_bfp.out21_20", "/transformer_blocks.10/attn2/to_v/Add_output_0.out17_3_67_bfp.out25_65", "/transformer_blocks.10/attn2/Reshape_3_output_0.out20_10_bfp.out27_0_21", "/transformer_blocks.10/attn2/to_out.0/Add_output_0.out17_3_68_bfp.out25_66", "/transformer_blocks.10/Add_1_output_0.out10_8", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42", "/transformer_blocks.10/Add_3_output_0.out0_0_53_bfp.out1_9", "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_67", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_69_bfp.out25_69", "/transformer_blocks.10/Add_4_output_0.out10_9", "/Add_10_output_0.out_35_1_14", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45", "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12", "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15", "/transformer_blocks.11/attn/Concat_6_output_0_16_11.out18_1_11_bfp.out19_22", "/transformer_blocks.11/attn/Concat_7_16_11.out18_1_11_bfp.out19_23", "/transformer_blocks.11/attn/Concat_8_3d.out18_1_11_bfp.out23_11", "/transformer_blocks.11/attn/Reshape_6_output_0.out18_1_11_bfp.out27_0_22", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/transformer_blocks.11/Add_output_0.out10_12", "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23", "/transformer_blocks.11/attn2/Reshape_1_output_0.out17_0_70_bfp.out21_23", "/transformer_blocks.11/Add_5_output_0.out10_15", "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47", "/transformer_blocks.11/Add_7_output_0.out0_0_59_bfp.out1_16", "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_74", "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_76", "/transformer_blocks.11/Add_8_output_0.out10_16", "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48", "/transformer_blocks.11/attn2/Reshape_output_0.out17_0_71_bfp.out21_22", "/transformer_blocks.11/attn2/to_v/Add_output_0.out17_3_73_bfp.out25_71", "/transformer_blocks.11/attn2/Reshape_3_output_0.out20_11_bfp.out27_0_23", "/transformer_blocks.11/attn2/to_out.0/Add_output_0.out17_3_74_bfp.out25_72", "/transformer_blocks.11/Add_1_output_0.out10_13", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46", "/transformer_blocks.11/Add_3_output_0.out0_0_58_bfp.out1_14", "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_73", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_75_bfp.out25_75", "/transformer_blocks.11/Add_4_output_0.out10_14", "/Add_11_output_0.out_35_1_15", "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49", "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18", "/transformer_blocks.12/attn2/Reshape_1_output_0.out17_0_76_bfp.out21_25", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17", "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20", "/transformer_blocks.12/attn/Concat_6_output_0_16_12.out18_1_12_bfp.out19_24", "/transformer_blocks.12/attn/Concat_7_16_12.out18_1_12_bfp.out19_25", "/transformer_blocks.12/attn/Concat_8_3d.out18_1_12_bfp.out23_12", "/transformer_blocks.12/attn/Reshape_6_output_0.out18_1_12_bfp.out27_0_24", "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24", "/transformer_blocks.12/Add_output_0.out10_17", "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25", "/transformer_blocks.12/Add_5_output_0.out10_20", "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51", "/transformer_blocks.12/Add_7_output_0.out0_0_64_bfp.out1_21", "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_80", "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_82_bfp.out25_82", "/transformer_blocks.12/Add_8_output_0.out10_21", "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52", "/transformer_blocks.12/attn2/Reshape_output_0.out17_0_77_bfp.out21_24", "/transformer_blocks.12/attn2/to_v/Add_output_0.out17_3_79_bfp.out25_77", "/transformer_blocks.12/attn2/Reshape_3_output_0.out20_12_bfp.out27_0_25", "/transformer_blocks.12/attn2/to_out.0/Add_output_0.out17_3_80_bfp.out25_78", "/transformer_blocks.12/Add_1_output_0.out10_18", "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50", "/transformer_blocks.12/Add_3_output_0.out0_0_63_bfp.out1_19", "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_79", "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_81_bfp.out25_81", "/transformer_blocks.12/Add_4_output_0.out10_19", "/Add_12_output_0.out_35_1_16", "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22", "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24", "/transformer_blocks.13/attn/Concat_6_output_0_16_13.out18_1_13_bfp.out19_26", "/transformer_blocks.13/attn/Concat_7_16_13.out18_1_13_bfp.out19_27", "/transformer_blocks.13/attn/Concat_8_3d.out18_1_13_bfp.out23_13", "/transformer_blocks.13/attn/Reshape_6_output_0.out18_1_13_bfp.out27_0_26", "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26", "/transformer_blocks.13/Add_output_0.out10_22", "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54", "/transformer_blocks.13/Add_2_output_0.out0_0_67_bfp.out1_23", "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_83", "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_85_bfp.out25_85", "/transformer_blocks.13/Add_3_output_0.out10_23", "/Add_13_output_0.out_35_1_17", "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27", "/transformer_blocks.13/Add_4_output_0.out10_24", "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55", "/transformer_blocks.13/Add_6_output_0.out0_0_68_bfp.out1_25", "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_84", "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_86_bfp.out25_86", "/transformer_blocks.13/Add_7_output_0.out10_25", "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26", "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56", "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28", "/transformer_blocks.14/attn/Concat_6_output_0_16_14.out18_1_14_bfp.out19_28", "/transformer_blocks.14/attn/Concat_7_16_14.out18_1_14_bfp.out19_29", "/transformer_blocks.14/attn/Concat_8_3d.out18_1_14_bfp.out23_14", "/transformer_blocks.14/attn/Reshape_6_output_0.out18_1_14_bfp.out27_0_27", "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28", "/transformer_blocks.14/Add_output_0.out10_26", "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58", "/transformer_blocks.14/Add_2_output_0.out0_0_71_bfp.out1_27", "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_87", "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_89_bfp.out25_89", "/transformer_blocks.14/Add_3_output_0.out10_27", "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29", "/transformer_blocks.14/Add_4_output_0.out10_28", "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59", "/transformer_blocks.14/Add_6_output_0.out0_0_72_bfp.out1_29", "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_88", "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_90_bfp.out25_90", "/transformer_blocks.14/Add_7_output_0.out10_29", "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60", "/Add_14_output_0.out_35_1_18", "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30", "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32", "/transformer_blocks.15/attn/Concat_6_output_0_16_15.out18_1_15_bfp.out19_30", "/transformer_blocks.15/attn/Concat_7_16_15.out18_1_15_bfp.out19_31", "/transformer_blocks.15/attn/Concat_8_3d.out18_1_15_bfp.out23_15", "/transformer_blocks.15/attn/Reshape_6_output_0.out18_1_15_bfp.out27_0_28", "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30", "/transformer_blocks.15/Add_output_0.out10_30", "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62", "/transformer_blocks.15/Add_2_output_0.out0_0_75_bfp.out1_31", "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_91", "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_93_bfp.out25_93", "/transformer_blocks.15/Add_3_output_0.out10_31", "/Add_15_output_0.out_35_1_19", "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31", "/transformer_blocks.15/Add_4_output_0.out10_32", "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63", "/transformer_blocks.15/Add_6_output_0.out0_0_76_bfp.out1_33", "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_92", "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_94_bfp.out25_94", "/transformer_blocks.15/Add_7_output_0.out10_33", "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34", "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64", "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36", "/transformer_blocks.16/attn/Concat_6_output_0_16_16.out18_1_16_bfp.out19_32", "/transformer_blocks.16/attn/Concat_7_16_16.out18_1_16_bfp.out19_33", "/transformer_blocks.16/attn/Concat_8_3d.out18_1_16_bfp.out23_16", "/transformer_blocks.16/attn/Reshape_6_output_0.out18_1_16_bfp.out27_0_29", "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32", "/transformer_blocks.16/Add_output_0.out10_34", "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66", "/transformer_blocks.16/Add_2_output_0.out0_0_79_bfp.out1_35", "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_95", "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_97_bfp.out25_97", "/transformer_blocks.16/Add_3_output_0.out10_35", "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33", "/transformer_blocks.16/Add_4_output_0.out10_36", "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67", "/transformer_blocks.16/Add_6_output_0.out0_0_80_bfp.out1_37", "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_96", "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_98_bfp.out25_98", "/transformer_blocks.16/Add_7_output_0.out10_37", "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68", "/Add_16_output_0.out_35_1_20", "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38", "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40", "/transformer_blocks.17/attn/Concat_6_output_0_16_17.out18_1_17_bfp.out19_34", "/transformer_blocks.17/attn/Concat_7_16_17.out18_1_17_bfp.out19_35", "/transformer_blocks.17/attn/Concat_8_3d.out18_1_17_bfp.out23_17", "/transformer_blocks.17/attn/Reshape_6_output_0.out18_1_17_bfp.out27_0_30", "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34", "/transformer_blocks.17/Add_output_0.out10_38", "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70", "/transformer_blocks.17/Add_2_output_0.out0_0_83_bfp.out1_39", "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_99", "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_101_bfp.out25_101", "/transformer_blocks.17/Add_3_output_0.out10_39", "/Add_17_output_0.out_35_1_21", "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35", "/transformer_blocks.17/Add_4_output_0.out10_40", "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71", "/transformer_blocks.17/Add_6_output_0.out0_0_84_bfp.out1_41", "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_100", "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_102", "/transformer_blocks.17/Add_7_output_0.out10_41", "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42", "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72", "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44", "/transformer_blocks.18/attn/Concat_6_output_0_16_18.out18_1_18_bfp.out19_36", "/transformer_blocks.18/attn/Concat_7_16_18.out18_1_18_bfp.out19_37", "/transformer_blocks.18/attn/Concat_8_3d.out18_1_18_bfp.out23_18", "/transformer_blocks.18/attn/Reshape_6_output_0.out18_1_18_bfp.out27_0_31", "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36", "/transformer_blocks.18/Add_output_0.out10_42", "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74", "/transformer_blocks.18/Add_2_output_0.out0_0_87_bfp.out1_43", "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_103", "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_105_bfp.out25_105", "/transformer_blocks.18/Add_3_output_0.out10_43", "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37", "/transformer_blocks.18/Add_4_output_0.out10_44", "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75", "/transformer_blocks.18/Add_6_output_0.out0_0_88_bfp.out1_45", "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_104", "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_106_bfp.out25_106", "/transformer_blocks.18/Add_7_output_0.out10_45", "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76", "/Add_18_output_0.out_35_1_22", "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46", "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48", "/transformer_blocks.19/attn/Concat_6_output_0_16_19.out18_1_19_bfp.out19_38", "/transformer_blocks.19/attn/Concat_7_16_19.out18_1_19_bfp.out19_39", "/transformer_blocks.19/attn/Concat_8_3d.out18_1_19_bfp.out23_19", "/transformer_blocks.19/attn/Reshape_6_output_0.out18_1_19_bfp.out27_0_32", "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38", "/transformer_blocks.19/Add_output_0.out10_46", "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78", "/transformer_blocks.19/Add_2_output_0.out0_0_91_bfp.out1_47", "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_107", "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_109_bfp.out25_109", "/transformer_blocks.19/Add_3_output_0.out10_47", "/Add_19_output_0.out_35_1_23", "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39", "/transformer_blocks.19/Add_4_output_0.out10_48", "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79", "/transformer_blocks.19/Add_6_output_0.out0_0_92_bfp.out1_49", "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_108", "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_110_bfp.out25_110", "/transformer_blocks.19/Add_7_output_0.out10_49", "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81", "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55", "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57", "/transformer_blocks.20/attn/Concat_6_output_0_16_20.out18_1_20_bfp.out19_40", "/transformer_blocks.20/attn/Concat_7_16_20.out18_1_20_bfp.out19_41", "/transformer_blocks.20/attn/Concat_8_3d.out18_1_20_bfp.out23_20", "/transformer_blocks.20/attn/Reshape_6_output_0.out18_1_20_bfp.out27_0_33", "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41", "/transformer_blocks.20/Add_4_output_0.out10_57", "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40", "/transformer_blocks.20/Add_output_0.out10_55", "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82", "/transformer_blocks.20/Add_2_output_0.out0_0_95_bfp.out1_56", "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_111", "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_113_bfp.out25_113", "/transformer_blocks.20/Add_3_output_0.out10_56", "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83", "/transformer_blocks.20/Add_6_output_0.out0_0_96_bfp.out1_58", "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_112", "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_114_bfp.out25_114", "/transformer_blocks.20/Add_7_output_0.out10_58", "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84", "/Add_20_output_0.out_35_1_24", "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59", "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61", "/transformer_blocks.21/attn/Concat_6_output_0_16_21.out18_1_21_bfp.out19_42", "/transformer_blocks.21/attn/Concat_7_16_21.out18_1_21_bfp.out19_43", "/transformer_blocks.21/attn/Concat_8_3d.out18_1_21_bfp.out23_21", "/transformer_blocks.21/attn/Reshape_6_output_0.out18_1_21_bfp.out27_0_34", "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42", "/transformer_blocks.21/Add_output_0.out10_59", "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86", "/transformer_blocks.21/Add_2_output_0.out0_0_99_bfp.out1_60", "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_115", "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_117_bfp.out25_117", "/transformer_blocks.21/Add_3_output_0.out10_60", "/Add_21_output_0.out_35_1_25", "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43", "/transformer_blocks.21/Add_4_output_0.out10_61", "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87", "/transformer_blocks.21/Add_6_output_0.out0_0_100_bfp.out1_62", "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_116", "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_118_bfp.out25_118", "/transformer_blocks.21/Add_7_output_0.out10_62", "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63", "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88", "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65", "/transformer_blocks.22/attn/Concat_6_output_0_16_22.out18_1_22_bfp.out19_44", "/transformer_blocks.22/attn/Concat_7_16_22.out18_1_22_bfp.out19_45", "/transformer_blocks.22/attn/Concat_8_3d.out18_1_22_bfp.out23_22", "/transformer_blocks.22/attn/Reshape_6_output_0.out18_1_22_bfp.out27_0_35", "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44", "/transformer_blocks.22/Add_output_0.out10_63", "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90", "/transformer_blocks.22/Add_2_output_0.out0_0_103_bfp.out1_64", "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_119", "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_121_bfp.out25_121", "/transformer_blocks.22/Add_3_output_0.out10_64", "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45", "/transformer_blocks.22/Add_4_output_0.out10_65", "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91", "/transformer_blocks.22/Add_6_output_0.out0_0_104_bfp.out1_66", "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_120", "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_122_bfp.out25_122", "/transformer_blocks.22/Add_7_output_0.out10_66", "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92", "/Add_22_output_0.out_35_1_26", "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67", "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69", "/transformer_blocks.23/attn/Concat_6_output_0_16_23.out18_1_23_bfp.out19_46", "/transformer_blocks.23/attn/Concat_7_16_23.out18_1_23_bfp.out19_47", "/transformer_blocks.23/attn/Concat_8_3d.out18_1_23_bfp.out23_23", "/transformer_blocks.23/attn/Reshape_6_output_0.out18_1_23_bfp.out27_0_36", "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46", "/transformer_blocks.23/Add_output_0.out10_67", "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94", "/transformer_blocks.23/Add_2_output_0.out0_0_107_bfp.out1_68", "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_123", "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_125_bfp.out25_124", "/transformer_blocks.23/Add_3_output_0.out10_68", "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95", "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105", "/norm_out/Add_2_output_0.out0_0_108" ] }, "const": { "buffer_size": 2557919104, "xrt_arg_id": 3, "packed_tensors": [ "existing_model.pos_embed.proj.weight", "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0", "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1", "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1", "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2", "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0", "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3", "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2", "encoder_hidden_states.out17_3_0_bfp.wts", "onnx::MatMul_11911", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts", "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1", "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1", "onnx::MatMul_11927_onnx::MatMul_11912", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4", "onnx::MatMul_11928_onnx::MatMul_11913", "onnx::MatMul_11929_onnx::MatMul_11914", "onnx::MatMul_11937", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma", "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4", "onnx::MatMul_11952", "onnx::MatMul_11953", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma", "onnx::MatMul_11938", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma", "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_11940", "onnx::MatMul_11939", "onnx::MatMul_11941", "onnx::MatMul_11949", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma", "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_6_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_7", "onnx::MatMul_11950", "onnx::MatMul_11951", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_8_gma", "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1", "onnx::MatMul_11969_onnx::MatMul_11954", "onnx::MatMul_11970_onnx::MatMul_11955", "onnx::MatMul_11971_onnx::MatMul_11956", "onnx::MatMul_11980", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma", "onnx::MatMul_11979", "onnx::MatMul_11982", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma", "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4", "onnx::MatMul_11994", "onnx::MatMul_11995", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma", "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_11981", "onnx::MatMul_11983", "onnx::MatMul_11991", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma", "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_6_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_7", "onnx::MatMul_11992", "onnx::MatMul_11993", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_8_gma", "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4", "onnx::MatMul_12024", "onnx::MatMul_12023", "onnx::MatMul_12025", "onnx::MatMul_12033", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1", "onnx::MatMul_12011_onnx::MatMul_11996", "onnx::MatMul_12012_onnx::MatMul_11997", "onnx::MatMul_12013_onnx::MatMul_11998", "onnx::MatMul_12021", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma", "onnx::MatMul_12022", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma", "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_6_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_7", "onnx::MatMul_12034", "onnx::MatMul_12035", "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4", "onnx::MatMul_12036", "onnx::MatMul_12037", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma", "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_8_gma", "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4", "onnx::MatMul_12066", "onnx::MatMul_12065", "onnx::MatMul_12067", "onnx::MatMul_12075", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1", "onnx::MatMul_12053_onnx::MatMul_12038", "onnx::MatMul_12054_onnx::MatMul_12039", "onnx::MatMul_12055_onnx::MatMul_12040", "onnx::MatMul_12063", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma", "onnx::MatMul_12064", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma", "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_6_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_7", "onnx::MatMul_12076", "onnx::MatMul_12077", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_8_gma", "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4", "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4", "onnx::MatMul_12078", "onnx::MatMul_12079", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma", "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12108", "onnx::MatMul_12107", "onnx::MatMul_12109", "onnx::MatMul_12117", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1", "onnx::MatMul_12095_onnx::MatMul_12080", "onnx::MatMul_12096_onnx::MatMul_12081", "onnx::MatMul_12097_onnx::MatMul_12082", "onnx::MatMul_12105", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma", "onnx::MatMul_12106", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma", "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_6_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_7", "onnx::MatMul_12118", "onnx::MatMul_12119", "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4", "onnx::MatMul_12120", "onnx::MatMul_12121", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma", "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_8_gma", "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4", "onnx::MatMul_12150", "onnx::MatMul_12149", "onnx::MatMul_12151", "onnx::MatMul_12159", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1", "onnx::MatMul_12137_onnx::MatMul_12122", "onnx::MatMul_12138_onnx::MatMul_12123", "onnx::MatMul_12139_onnx::MatMul_12124", "onnx::MatMul_12147", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma", "onnx::MatMul_12148", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma", "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_6_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_7", "onnx::MatMul_12160", "onnx::MatMul_12161", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_8_gma", "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4", "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4", "onnx::MatMul_12162", "onnx::MatMul_12163", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma", "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12192", "onnx::MatMul_12191", "onnx::MatMul_12193", "onnx::MatMul_12201", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1", "onnx::MatMul_12179_onnx::MatMul_12164", "onnx::MatMul_12180_onnx::MatMul_12165", "onnx::MatMul_12181_onnx::MatMul_12166", "onnx::MatMul_12189", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma", "onnx::MatMul_12190", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma", "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_6_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_7", "onnx::MatMul_12202", "onnx::MatMul_12203", "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4", "onnx::MatMul_12204", "onnx::MatMul_12205", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma", "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_8_gma", "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4", "onnx::MatMul_12234", "onnx::MatMul_12233", "onnx::MatMul_12235", "onnx::MatMul_12243", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1", "onnx::MatMul_12221_onnx::MatMul_12206", "onnx::MatMul_12222_onnx::MatMul_12207", "onnx::MatMul_12223_onnx::MatMul_12208", "onnx::MatMul_12231", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma", "onnx::MatMul_12232", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma", "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_6_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_7", "onnx::MatMul_12244", "onnx::MatMul_12245", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_8_gma", "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4", "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4", "onnx::MatMul_12246", "onnx::MatMul_12247", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma", "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12276", "onnx::MatMul_12275", "onnx::MatMul_12277", "onnx::MatMul_12285", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1", "onnx::MatMul_12263_onnx::MatMul_12248", "onnx::MatMul_12264_onnx::MatMul_12249", "onnx::MatMul_12265_onnx::MatMul_12250", "onnx::MatMul_12273", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma", "onnx::MatMul_12274", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma", "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_6_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_7", "onnx::MatMul_12286", "onnx::MatMul_12287", "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4", "onnx::MatMul_12288", "onnx::MatMul_12289", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma", "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_8_gma", "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4", "onnx::MatMul_12318", "onnx::MatMul_12317", "onnx::MatMul_12319", "onnx::MatMul_12327", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1", "onnx::MatMul_12305_onnx::MatMul_12290", "onnx::MatMul_12306_onnx::MatMul_12291", "onnx::MatMul_12307_onnx::MatMul_12292", "onnx::MatMul_12315", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma", "onnx::MatMul_12316", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma", "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_6_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_7", "onnx::MatMul_12328", "onnx::MatMul_12329", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_8_gma", "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4", "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4", "onnx::MatMul_12330", "onnx::MatMul_12331", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma", "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1", "onnx::MatMul_12347_onnx::MatMul_12332", "onnx::MatMul_12348_onnx::MatMul_12333", "onnx::MatMul_12349_onnx::MatMul_12334", "onnx::MatMul_12358", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma", "onnx::MatMul_12357", "onnx::MatMul_12360", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma", "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4", "onnx::MatMul_12372", "onnx::MatMul_12373", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma", "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12359", "onnx::MatMul_12361", "onnx::MatMul_12369", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma", "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_6_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_7", "onnx::MatMul_12370", "onnx::MatMul_12371", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_8_gma", "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1", "onnx::MatMul_12389_onnx::MatMul_12374", "onnx::MatMul_12390_onnx::MatMul_12375", "onnx::MatMul_12391_onnx::MatMul_12376", "onnx::MatMul_12400", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma", "onnx::MatMul_12399", "onnx::MatMul_12402", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma", "/transformer_blocks.11/norm2_context/Constant_output_0", "/transformer_blocks.11/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4", "onnx::MatMul_12414", "onnx::MatMul_12415", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma", "/transformer_blocks.12/norm1_context/norm/Constant_output_0", "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12401", "onnx::MatMul_12403", "onnx::MatMul_12411", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma", "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_6_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_7", "onnx::MatMul_12412", "onnx::MatMul_12413", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_8_gma", "/transformer_blocks.12/norm1/norm/Constant_output_0", "/transformer_blocks.12/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4", "onnx::MatMul_12444", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1", "onnx::MatMul_12431_onnx::MatMul_12416", "onnx::MatMul_12432_onnx::MatMul_12417", "onnx::MatMul_12433_onnx::MatMul_12418", "onnx::MatMul_12442", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma", "onnx::MatMul_12441", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma", "/transformer_blocks.12/norm2_context/Constant_output_0", "/transformer_blocks.12/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4", "onnx::MatMul_12456", "onnx::MatMul_12457", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma", "/transformer_blocks.13/norm1_context/norm/Constant_output_0", "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0", "onnx::MatMul_12443", "onnx::MatMul_12445", "onnx::MatMul_12453", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma", "/transformer_blocks.12/norm2/Constant_output_0", "/transformer_blocks.12/norm2/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_6_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_7", "onnx::MatMul_12454", "onnx::MatMul_12455", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_8_gma", "/transformer_blocks.13/norm1/norm/Constant_output_0", "/transformer_blocks.13/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1", "onnx::MatMul_12473_onnx::MatMul_12458", "onnx::MatMul_12474_onnx::MatMul_12459", "onnx::MatMul_12475_onnx::MatMul_12460", "onnx::MatMul_12484", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma", "/transformer_blocks.13/norm2/Constant_output_0", "/transformer_blocks.13/norm2/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4", "onnx::MatMul_12485", "onnx::MatMul_12486", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma", "onnx::MatMul_12483", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma", "/transformer_blocks.13/norm2_context/Constant_output_0", "/transformer_blocks.13/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4", "onnx::MatMul_12487", "onnx::MatMul_12488", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma", "/transformer_blocks.14/norm1/norm/Constant_output_0", "/transformer_blocks.14/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1", "/transformer_blocks.14/norm1_context/norm/Constant_output_0", "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1", "onnx::MatMul_12504_onnx::MatMul_12489", "onnx::MatMul_12505_onnx::MatMul_12490", "onnx::MatMul_12506_onnx::MatMul_12491", "onnx::MatMul_12515", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma", "/transformer_blocks.14/norm2/Constant_output_0", "/transformer_blocks.14/norm2/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4", "onnx::MatMul_12516", "onnx::MatMul_12517", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma", "onnx::MatMul_12514", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma", "/transformer_blocks.14/norm2_context/Constant_output_0", "/transformer_blocks.14/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4", "onnx::MatMul_12518", "onnx::MatMul_12519", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma", "/transformer_blocks.15/norm1_context/norm/Constant_output_0", "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.15/norm1/norm/Constant_output_0", "/transformer_blocks.15/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1", "onnx::MatMul_12535_onnx::MatMul_12520", "onnx::MatMul_12536_onnx::MatMul_12521", "onnx::MatMul_12537_onnx::MatMul_12522", "onnx::MatMul_12546", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma", "/transformer_blocks.15/norm2/Constant_output_0", "/transformer_blocks.15/norm2/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4", "onnx::MatMul_12547", "onnx::MatMul_12548", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma", "onnx::MatMul_12545", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma", "/transformer_blocks.15/norm2_context/Constant_output_0", "/transformer_blocks.15/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4", "onnx::MatMul_12549", "onnx::MatMul_12550", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma", "/transformer_blocks.16/norm1/norm/Constant_output_0", "/transformer_blocks.16/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1", "/transformer_blocks.16/norm1_context/norm/Constant_output_0", "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1", "onnx::MatMul_12566_onnx::MatMul_12551", "onnx::MatMul_12567_onnx::MatMul_12552", "onnx::MatMul_12568_onnx::MatMul_12553", "onnx::MatMul_12577", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma", "/transformer_blocks.16/norm2/Constant_output_0", "/transformer_blocks.16/norm2/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4", "onnx::MatMul_12578", "onnx::MatMul_12579", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma", "onnx::MatMul_12576", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma", "/transformer_blocks.16/norm2_context/Constant_output_0", "/transformer_blocks.16/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4", "onnx::MatMul_12580", "onnx::MatMul_12581", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma", "/transformer_blocks.17/norm1_context/norm/Constant_output_0", "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.17/norm1/norm/Constant_output_0", "/transformer_blocks.17/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1", "onnx::MatMul_12597_onnx::MatMul_12582", "onnx::MatMul_12598_onnx::MatMul_12583", "onnx::MatMul_12599_onnx::MatMul_12584", "onnx::MatMul_12608", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma", "/transformer_blocks.17/norm2/Constant_output_0", "/transformer_blocks.17/norm2/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4", "onnx::MatMul_12609", "onnx::MatMul_12610", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma", "onnx::MatMul_12607", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma", "/transformer_blocks.17/norm2_context/Constant_output_0", "/transformer_blocks.17/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4", "onnx::MatMul_12611", "onnx::MatMul_12612", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma", "/transformer_blocks.18/norm1/norm/Constant_output_0", "/transformer_blocks.18/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1", "/transformer_blocks.18/norm1_context/norm/Constant_output_0", "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1", "onnx::MatMul_12628_onnx::MatMul_12613", "onnx::MatMul_12629_onnx::MatMul_12614", "onnx::MatMul_12630_onnx::MatMul_12615", "onnx::MatMul_12639", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma", "/transformer_blocks.18/norm2/Constant_output_0", "/transformer_blocks.18/norm2/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4", "onnx::MatMul_12640", "onnx::MatMul_12641", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma", "onnx::MatMul_12638", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma", "/transformer_blocks.18/norm2_context/Constant_output_0", "/transformer_blocks.18/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4", "onnx::MatMul_12642", "onnx::MatMul_12643", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma", "/transformer_blocks.19/norm1_context/norm/Constant_output_0", "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.19/norm1/norm/Constant_output_0", "/transformer_blocks.19/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1", "onnx::MatMul_12659_onnx::MatMul_12644", "onnx::MatMul_12660_onnx::MatMul_12645", "onnx::MatMul_12661_onnx::MatMul_12646", "onnx::MatMul_12670", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma", "/transformer_blocks.19/norm2/Constant_output_0", "/transformer_blocks.19/norm2/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4", "onnx::MatMul_12671", "onnx::MatMul_12672", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma", "onnx::MatMul_12669", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma", "/transformer_blocks.19/norm2_context/Constant_output_0", "/transformer_blocks.19/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4", "onnx::MatMul_12673", "onnx::MatMul_12674", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma", "/transformer_blocks.20/norm1/norm/Constant_output_0", "/transformer_blocks.20/norm1/norm/Constant_1_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1", "onnx::MatMul_12690_onnx::MatMul_12675", "onnx::MatMul_12691_onnx::MatMul_12676", "onnx::MatMul_12692_onnx::MatMul_12677", "onnx::MatMul_12700", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma", "onnx::MatMul_12701", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma", "/transformer_blocks.20/norm2/Constant_output_0", "/transformer_blocks.20/norm2/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4", "onnx::MatMul_12702", "onnx::MatMul_12703", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma", "/transformer_blocks.20/norm2_context/Constant_output_0", "/transformer_blocks.20/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4", "onnx::MatMul_12704", "onnx::MatMul_12705", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma", "/transformer_blocks.21/norm1_context/norm/Constant_output_0", "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.21/norm1/norm/Constant_output_0", "/transformer_blocks.21/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1", "onnx::MatMul_12721_onnx::MatMul_12706", "onnx::MatMul_12722_onnx::MatMul_12707", "onnx::MatMul_12723_onnx::MatMul_12708", "onnx::MatMul_12732", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma", "/transformer_blocks.21/norm2/Constant_output_0", "/transformer_blocks.21/norm2/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4", "onnx::MatMul_12733", "onnx::MatMul_12734", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma", "onnx::MatMul_12731", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma", "/transformer_blocks.21/norm2_context/Constant_output_0", "/transformer_blocks.21/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4", "onnx::MatMul_12735", "onnx::MatMul_12736", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma", "/transformer_blocks.22/norm1/norm/Constant_output_0", "/transformer_blocks.22/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1", "/transformer_blocks.22/norm1_context/norm/Constant_output_0", "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1", "onnx::MatMul_12752_onnx::MatMul_12737", "onnx::MatMul_12753_onnx::MatMul_12738", "onnx::MatMul_12754_onnx::MatMul_12739", "onnx::MatMul_12763", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma", "/transformer_blocks.22/norm2/Constant_output_0", "/transformer_blocks.22/norm2/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4", "onnx::MatMul_12764", "onnx::MatMul_12765", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma", "onnx::MatMul_12762", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma", "/transformer_blocks.22/norm2_context/Constant_output_0", "/transformer_blocks.22/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4", "onnx::MatMul_12766", "onnx::MatMul_12767", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma", "/transformer_blocks.23/norm1_context/norm/Constant_output_0", "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.23/norm1/norm/Constant_output_0", "/transformer_blocks.23/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1", "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1", "onnx::MatMul_12783_onnx::MatMul_12768", "onnx::MatMul_12784_onnx::MatMul_12769", "onnx::MatMul_12785_onnx::MatMul_12770", "onnx::MatMul_12790", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma", "/transformer_blocks.23/norm2/Constant_output_0", "/transformer_blocks.23/norm2/Constant_1_output_0", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4", "onnx::MatMul_12791", "onnx::MatMul_12792", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma", "/norm_out/norm/Constant_output_0", "/norm_out/norm/Constant_1_output_0", "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1", "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105_bfp.wts", "onnx::MatMul_12793" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] } }, "tensor_map": { "hidden_states_nhwc.out5_0_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_2", "w", "h", "False" ], "offset": 0 }, "/pos_embed/Reshape_1_output_0.out_35_1_2": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "False", "state_dim1", "False" ], "offset": 32 }, "/time_text_embed/Cast_output_0.out17_3_3": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 256 ], "size_in_bytes": 512, "op_tensor_size": 512, "dynamic_shapes": [ "batch_2", "False" ], "offset": 3104 }, "pooled_projections.out17_3_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 2048 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_2", "False" ], "offset": 3616 }, "encoder_hidden_states.out17_3_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 4096 ], "size_in_bytes": 8192, "op_tensor_size": 8192, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 7712 }, "block_controlnet_hidden_states_0.out_35_1_4": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 18976 }, "block_controlnet_hidden_states_1.out_35_1_6": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 25120 }, "block_controlnet_hidden_states_2.out_35_1_8": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 31264 }, "block_controlnet_hidden_states_3.out_35_1_10": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 37408 }, "block_controlnet_hidden_states_4.out_35_1_12": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 43552 }, "block_controlnet_hidden_states_5.out_35_1_14": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 49696 }, "block_controlnet_hidden_states_6.out_35_1_16": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 55840 }, "block_controlnet_hidden_states_7.out_35_1_18": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 61984 }, "block_controlnet_hidden_states_8.out_35_1_20": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 68128 }, "block_controlnet_hidden_states_9.out_35_1_22": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 74272 }, "block_controlnet_hidden_states_10.out_35_1_24": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 80416 }, "block_controlnet_hidden_states_11.out_35_1_26": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 83488 }, "/Reshape_output_0.out17_0_122": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1, 2, 2, 16 ], "size_in_bytes": 128, "op_tensor_size": 128, "dynamic_shapes": [ "batch_2", "floor(w/2)", "floor(h/2)", "False", "False", "False" ], "offset": 0 }, "/pos_embed/Transpose_output_0.out5_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 0 }, "/pos_embed/Add_2_output_0.out_35_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 3072 }, "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 6144 }, "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 9216 }, "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 12288 }, "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 15360 }, "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 18432 }, "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 21504 }, "/time_text_embed/Add_output_0.out_35_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 24576 }, "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "False" ], "offset": 27648 }, "encoder_hidden_states.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 4096 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 30720 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 34816 }, "/context_embedder/Add_output_0.out17_3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 36352 }, "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 39424 }, "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 40960 }, "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 42496 }, "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_2_bfp.out1_106": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 44032 }, "/transformer_blocks.0/attn/Concat_6_output_0_16_0.out18_1_0_bfp.out19_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 45568 }, "/transformer_blocks.0/norm1/Add_4_output_0.out0_0_1_bfp.out1_107": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 47104 }, "/transformer_blocks.0/attn/Concat_7_16_0.out18_1_0_bfp.out19_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 48640 }, "/transformer_blocks.0/attn/Concat_8_3d.out18_1_0_bfp.out23_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 50176 }, "/transformer_blocks.0/attn/Reshape_6_output_0.out18_1_0_bfp.out27_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 51712 }, "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 53248 }, "/transformer_blocks.0/Add_5_output_0.out10_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 54784 }, "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 57856 }, "/transformer_blocks.0/Add_7_output_0.out0_0_4_bfp.out1_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 59392 }, "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 60928 }, "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_10_bfp.out25_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 67072 }, "/transformer_blocks.0/Add_8_output_0.out10_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 68608 }, "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 71680 }, "/transformer_blocks.0/Add_output_0.out10_104": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 73216 }, "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 76288 }, "/transformer_blocks.0/attn2/Reshape_1_output_0.out17_0_4_bfp.out21_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 77824 }, "/transformer_blocks.0/attn2/Reshape_output_0.out17_0_5_bfp.out21_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 79360 }, "/transformer_blocks.0/attn2/to_v/Add_output_0.out17_3_7_bfp.out25_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 80896 }, "/transformer_blocks.0/attn2/Reshape_3_output_0.out20_0_bfp.out27_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 82432 }, "/transformer_blocks.0/attn2/to_out.0/Add_output_0.out17_3_8_bfp.out25_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 83968 }, "/transformer_blocks.0/Add_1_output_0.out10_105": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 85504 }, "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 88576 }, "/transformer_blocks.0/Add_3_output_0.out0_0_3_bfp.out1_108": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 90112 }, "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 91648 }, "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_9_bfp.out25_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 97792 }, "/transformer_blocks.0/Add_4_output_0.out10_106": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 99328 }, "/Add_output_0.out_35_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 102400 }, "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 105472 }, "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_7_bfp.out1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 107008 }, "/transformer_blocks.1/norm1/Add_4_output_0.out0_0_6_bfp.out1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 108544 }, "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 110080 }, "/transformer_blocks.1/attn/Concat_6_output_0_16_1.out18_1_1_bfp.out19_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 111616 }, "/transformer_blocks.1/attn/Concat_7_16_1.out18_1_1_bfp.out19_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 113152 }, "/transformer_blocks.1/attn/Concat_8_3d.out18_1_1_bfp.out23_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 114688 }, "/transformer_blocks.1/attn/Reshape_6_output_0.out18_1_1_bfp.out27_0_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 116224 }, "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 117760 }, "/transformer_blocks.1/Add_output_0.out10_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 119296 }, "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 122368 }, "/transformer_blocks.1/attn2/Reshape_1_output_0.out17_0_10_bfp.out21_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 123904 }, "/transformer_blocks.1/Add_5_output_0.out10_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 125440 }, "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 128512 }, "/transformer_blocks.1/Add_7_output_0.out0_0_9_bfp.out1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 130048 }, "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 131584 }, "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_16_bfp.out25_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 137728 }, "/transformer_blocks.1/Add_8_output_0.out10_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 139264 }, "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 142336 }, "/transformer_blocks.1/attn2/Reshape_output_0.out17_0_11_bfp.out21_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 143872 }, "/transformer_blocks.1/attn2/to_v/Add_output_0.out17_3_13_bfp.out25_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 145408 }, "/transformer_blocks.1/attn2/Reshape_3_output_0.out20_1_bfp.out27_0_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 146944 }, "/transformer_blocks.1/attn2/to_out.0/Add_output_0.out17_3_14_bfp.out25_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 148480 }, "/transformer_blocks.1/Add_1_output_0.out10_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 150016 }, "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 153088 }, "/transformer_blocks.1/Add_3_output_0.out0_0_8_bfp.out1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 154624 }, "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 156160 }, "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_15_bfp.out25_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 162304 }, "/transformer_blocks.1/Add_4_output_0.out10_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 163840 }, "/Add_1_output_0.out_35_1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 166912 }, "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 169984 }, "/transformer_blocks.2/norm1/Add_4_output_0.out0_0_11_bfp.out1_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 171520 }, "/transformer_blocks.2/attn2/Reshape_1_output_0.out17_0_16_bfp.out21_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 173056 }, "/transformer_blocks.2/attn2/Reshape_output_0.out17_0_17_bfp.out21_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 174592 }, "/transformer_blocks.2/attn2/to_v/Add_output_0.out17_3_19_bfp.out25_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 176128 }, "/transformer_blocks.2/attn2/Reshape_3_output_0.out20_2_bfp.out27_0_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 177664 }, "/transformer_blocks.2/attn2/to_out.0/Add_output_0.out17_3_20_bfp.out25_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 179200 }, "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_12_bfp.out1_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 180736 }, "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_10_bfp.out1_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 182272 }, "/transformer_blocks.2/attn/Concat_6_output_0_16_2.out18_1_2_bfp.out19_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 183808 }, "/transformer_blocks.2/attn/Concat_7_16_2.out18_1_2_bfp.out19_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 185344 }, "/transformer_blocks.2/attn/Concat_8_3d.out18_1_2_bfp.out23_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 186880 }, "/transformer_blocks.2/attn/Reshape_6_output_0.out18_1_2_bfp.out27_0_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 188416 }, "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 189952 }, "/transformer_blocks.2/Add_5_output_0.out10_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 191488 }, "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 194560 }, "/transformer_blocks.2/Add_output_0.out10_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 196096 }, "/transformer_blocks.2/Add_1_output_0.out10_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 199168 }, "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 202240 }, "/transformer_blocks.2/Add_3_output_0.out0_0_13_bfp.out1_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 203776 }, "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 205312 }, "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_21_bfp.out25_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 211456 }, "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 212992 }, "/transformer_blocks.2/Add_7_output_0.out0_0_14_bfp.out1_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 214528 }, "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 216064 }, "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_22_bfp.out25_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 222208 }, "/transformer_blocks.2/Add_8_output_0.out10_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 223744 }, "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 226816 }, "/transformer_blocks.2/Add_4_output_0.out10_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 228352 }, "/Add_2_output_0.out_35_1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 231424 }, "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 234496 }, "/transformer_blocks.3/norm1/Add_4_output_0.out0_0_16_bfp.out1_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 236032 }, "/transformer_blocks.3/attn2/Reshape_1_output_0.out17_0_22_bfp.out21_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 237568 }, "/transformer_blocks.3/attn2/Reshape_output_0.out17_0_23_bfp.out21_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 239104 }, "/transformer_blocks.3/attn2/to_v/Add_output_0.out17_3_25_bfp.out25_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 240640 }, "/transformer_blocks.3/attn2/Reshape_3_output_0.out20_3_bfp.out27_0_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 242176 }, "/transformer_blocks.3/attn2/to_out.0/Add_output_0.out17_3_26_bfp.out25_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 243712 }, "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_17_bfp.out1_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 245248 }, "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_15_bfp.out1_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 246784 }, "/transformer_blocks.3/attn/Concat_6_output_0_16_3.out18_1_3_bfp.out19_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 248320 }, "/transformer_blocks.3/attn/Concat_7_16_3.out18_1_3_bfp.out19_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 249856 }, "/transformer_blocks.3/attn/Concat_8_3d.out18_1_3_bfp.out23_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 251392 }, "/transformer_blocks.3/attn/Reshape_6_output_0.out18_1_3_bfp.out27_0_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 252928 }, "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 254464 }, "/transformer_blocks.3/Add_5_output_0.out10_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 256000 }, "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 259072 }, "/transformer_blocks.3/Add_output_0.out10_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 260608 }, "/transformer_blocks.3/Add_1_output_0.out10_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 263680 }, "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 266752 }, "/transformer_blocks.3/Add_3_output_0.out0_0_18_bfp.out1_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 268288 }, "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 269824 }, "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_27_bfp.out25_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 275968 }, "/transformer_blocks.3/Add_4_output_0.out10_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 277504 }, "/Add_3_output_0.out_35_1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 280576 }, "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 283648 }, "/transformer_blocks.4/norm1/Add_4_output_0.out0_0_21_bfp.out1_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 285184 }, "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 286720 }, "/transformer_blocks.3/Add_7_output_0.out0_0_19_bfp.out1_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 288256 }, "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 289792 }, "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 295936 }, "/transformer_blocks.3/Add_8_output_0.out10_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 297472 }, "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 300544 }, "/transformer_blocks.4/attn2/Reshape_1_output_0.out17_0_28_bfp.out21_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 302080 }, "/transformer_blocks.4/attn2/Reshape_output_0.out17_0_29_bfp.out21_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 303616 }, "/transformer_blocks.4/attn2/to_v/Add_output_0.out17_3_31_bfp.out25_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 305152 }, "/transformer_blocks.4/attn2/Reshape_3_output_0.out20_4_bfp.out27_0_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 306688 }, "/transformer_blocks.4/attn2/to_out.0/Add_output_0.out17_3_32_bfp.out25_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 308224 }, "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_22_bfp.out1_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 309760 }, "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 311296 }, "/transformer_blocks.4/attn/Concat_6_output_0_16_4.out18_1_4_bfp.out19_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 312832 }, "/transformer_blocks.4/attn/Concat_7_16_4.out18_1_4_bfp.out19_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 314368 }, "/transformer_blocks.4/attn/Concat_8_3d.out18_1_4_bfp.out23_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 315904 }, "/transformer_blocks.4/attn/Reshape_6_output_0.out18_1_4_bfp.out27_0_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 317440 }, "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 318976 }, "/transformer_blocks.4/Add_5_output_0.out10_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 320512 }, "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 323584 }, "/transformer_blocks.4/Add_output_0.out10_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 325120 }, "/transformer_blocks.4/Add_1_output_0.out10_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 328192 }, "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 331264 }, "/transformer_blocks.4/Add_3_output_0.out0_0_23_bfp.out1_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 332800 }, "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 334336 }, "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_33_bfp.out25_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 340480 }, "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 342016 }, "/transformer_blocks.4/Add_7_output_0.out0_0_24_bfp.out1_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 343552 }, "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 345088 }, "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_34_bfp.out25_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 351232 }, "/transformer_blocks.4/Add_8_output_0.out10_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 352768 }, "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 355840 }, "/transformer_blocks.4/Add_4_output_0.out10_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 357376 }, "/Add_4_output_0.out_35_1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 360448 }, "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 363520 }, "/transformer_blocks.5/norm1/Add_4_output_0.out0_0_26_bfp.out1_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 365056 }, "/transformer_blocks.5/attn2/Reshape_1_output_0.out17_0_34_bfp.out21_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 366592 }, "/transformer_blocks.5/attn2/Reshape_output_0.out17_0_35_bfp.out21_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 368128 }, "/transformer_blocks.5/attn2/to_v/Add_output_0.out17_3_37_bfp.out25_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 369664 }, "/transformer_blocks.5/attn2/Reshape_3_output_0.out20_5_bfp.out27_0_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 371200 }, "/transformer_blocks.5/attn2/to_out.0/Add_output_0.out17_3_38_bfp.out25_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 372736 }, "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_27_bfp.out1_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 374272 }, "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 375808 }, "/transformer_blocks.5/attn/Concat_6_output_0_16_5.out18_1_5_bfp.out19_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 377344 }, "/transformer_blocks.5/attn/Concat_7_16_5.out18_1_5_bfp.out19_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 378880 }, "/transformer_blocks.5/attn/Concat_8_3d.out18_1_5_bfp.out23_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 380416 }, "/transformer_blocks.5/attn/Reshape_6_output_0.out18_1_5_bfp.out27_0_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 381952 }, "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 383488 }, "/transformer_blocks.5/Add_5_output_0.out10_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 385024 }, "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 388096 }, "/transformer_blocks.5/Add_output_0.out10_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 389632 }, "/transformer_blocks.5/Add_1_output_0.out10_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 392704 }, "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 395776 }, "/transformer_blocks.5/Add_3_output_0.out0_0_28_bfp.out1_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 397312 }, "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 398848 }, "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_39_bfp.out25_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 404992 }, "/transformer_blocks.5/Add_4_output_0.out10_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 406528 }, "/Add_5_output_0.out_35_1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 409600 }, "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 412672 }, "/transformer_blocks.6/norm1/Add_4_output_0.out0_0_31_bfp.out1_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 414208 }, "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 415744 }, "/transformer_blocks.5/Add_7_output_0.out0_0_29_bfp.out1_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 417280 }, "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 418816 }, "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_40_bfp.out25_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 424960 }, "/transformer_blocks.5/Add_8_output_0.out10_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 426496 }, "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 429568 }, "/transformer_blocks.6/attn2/Reshape_1_output_0.out17_0_40_bfp.out21_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 431104 }, "/transformer_blocks.6/attn2/Reshape_output_0.out17_0_41_bfp.out21_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 432640 }, "/transformer_blocks.6/attn2/to_v/Add_output_0.out17_3_43_bfp.out25_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 434176 }, "/transformer_blocks.6/attn2/Reshape_3_output_0.out20_6_bfp.out27_0_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 435712 }, "/transformer_blocks.6/attn2/to_out.0/Add_output_0.out17_3_44_bfp.out25_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 437248 }, "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_32_bfp.out1_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 438784 }, "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_30_bfp.out1_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 440320 }, "/transformer_blocks.6/attn/Concat_6_output_0_16_6.out18_1_6_bfp.out19_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 441856 }, "/transformer_blocks.6/attn/Concat_7_16_6.out18_1_6_bfp.out19_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 443392 }, "/transformer_blocks.6/attn/Concat_8_3d.out18_1_6_bfp.out23_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 444928 }, "/transformer_blocks.6/attn/Reshape_6_output_0.out18_1_6_bfp.out27_0_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 446464 }, "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 448000 }, "/transformer_blocks.6/Add_5_output_0.out10_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 449536 }, "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 452608 }, "/transformer_blocks.6/Add_output_0.out10_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 454144 }, "/transformer_blocks.6/Add_1_output_0.out10_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 457216 }, "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 460288 }, "/transformer_blocks.6/Add_3_output_0.out0_0_33_bfp.out1_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 461824 }, "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 463360 }, "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_45_bfp.out25_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 469504 }, "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 471040 }, "/transformer_blocks.6/Add_7_output_0.out0_0_34_bfp.out1_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 472576 }, "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 474112 }, "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_46_bfp.out25_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 480256 }, "/transformer_blocks.6/Add_8_output_0.out10_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 481792 }, "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 484864 }, "/transformer_blocks.6/Add_4_output_0.out10_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 486400 }, "/Add_6_output_0.out_35_1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 489472 }, "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 492544 }, "/transformer_blocks.7/norm1/Add_4_output_0.out0_0_36_bfp.out1_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 494080 }, "/transformer_blocks.7/attn2/Reshape_1_output_0.out17_0_46_bfp.out21_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 495616 }, "/transformer_blocks.7/attn2/Reshape_output_0.out17_0_47_bfp.out21_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 497152 }, "/transformer_blocks.7/attn2/to_v/Add_output_0.out17_3_49_bfp.out25_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 498688 }, "/transformer_blocks.7/attn2/Reshape_3_output_0.out20_7_bfp.out27_0_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 500224 }, "/transformer_blocks.7/attn2/to_out.0/Add_output_0.out17_3_50_bfp.out25_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 501760 }, "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_37_bfp.out1_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 503296 }, "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_35_bfp.out1_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 504832 }, "/transformer_blocks.7/attn/Concat_6_output_0_16_7.out18_1_7_bfp.out19_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 506368 }, "/transformer_blocks.7/attn/Concat_7_16_7.out18_1_7_bfp.out19_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 507904 }, "/transformer_blocks.7/attn/Concat_8_3d.out18_1_7_bfp.out23_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 509440 }, "/transformer_blocks.7/attn/Reshape_6_output_0.out18_1_7_bfp.out27_0_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 510976 }, "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 512512 }, "/transformer_blocks.7/Add_5_output_0.out10_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 514048 }, "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 517120 }, "/transformer_blocks.7/Add_output_0.out10_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 518656 }, "/transformer_blocks.7/Add_1_output_0.out10_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 521728 }, "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 524800 }, "/transformer_blocks.7/Add_3_output_0.out0_0_38_bfp.out1_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 526336 }, "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 527872 }, "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_51_bfp.out25_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 534016 }, "/transformer_blocks.7/Add_4_output_0.out10_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 535552 }, "/Add_7_output_0.out_35_1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 538624 }, "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 541696 }, "/transformer_blocks.8/norm1/Add_4_output_0.out0_0_41_bfp.out1_96": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 543232 }, "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 544768 }, "/transformer_blocks.7/Add_7_output_0.out0_0_39_bfp.out1_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 546304 }, "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 547840 }, "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 553984 }, "/transformer_blocks.7/Add_8_output_0.out10_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 555520 }, "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 558592 }, "/transformer_blocks.8/attn2/Reshape_1_output_0.out17_0_52_bfp.out21_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 560128 }, "/transformer_blocks.8/attn2/Reshape_output_0.out17_0_53_bfp.out21_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 561664 }, "/transformer_blocks.8/attn2/to_v/Add_output_0.out17_3_55_bfp.out25_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 563200 }, "/transformer_blocks.8/attn2/Reshape_3_output_0.out20_8_bfp.out27_0_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 564736 }, "/transformer_blocks.8/attn2/to_out.0/Add_output_0.out17_3_56_bfp.out25_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 566272 }, "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_42_bfp.out1_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 567808 }, "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_40_bfp.out1_98": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 569344 }, "/transformer_blocks.8/attn/Concat_6_output_0_16_8.out18_1_8_bfp.out19_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 570880 }, "/transformer_blocks.8/attn/Concat_7_16_8.out18_1_8_bfp.out19_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 572416 }, "/transformer_blocks.8/attn/Concat_8_3d.out18_1_8_bfp.out23_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 573952 }, "/transformer_blocks.8/attn/Reshape_6_output_0.out18_1_8_bfp.out27_0_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 575488 }, "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 577024 }, "/transformer_blocks.8/Add_5_output_0.out10_97": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 578560 }, "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 581632 }, "/transformer_blocks.8/Add_output_0.out10_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 583168 }, "/transformer_blocks.8/Add_1_output_0.out10_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 586240 }, "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 589312 }, "/transformer_blocks.8/Add_3_output_0.out0_0_43_bfp.out1_97": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 590848 }, "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 592384 }, "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_57_bfp.out25_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 598528 }, "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 600064 }, "/transformer_blocks.8/Add_7_output_0.out0_0_44_bfp.out1_99": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 601600 }, "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 603136 }, "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_58_bfp.out25_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 609280 }, "/transformer_blocks.8/Add_8_output_0.out10_98": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 610816 }, "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 613888 }, "/transformer_blocks.8/Add_4_output_0.out10_96": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 615424 }, "/Add_8_output_0.out_35_1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 618496 }, "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 621568 }, "/transformer_blocks.9/norm1/Add_4_output_0.out0_0_46_bfp.out1_101": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 623104 }, "/transformer_blocks.9/attn2/Reshape_1_output_0.out17_0_58_bfp.out21_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 624640 }, "/transformer_blocks.9/attn2/Reshape_output_0.out17_0_59_bfp.out21_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 626176 }, "/transformer_blocks.9/attn2/to_v/Add_output_0.out17_3_61_bfp.out25_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 627712 }, "/transformer_blocks.9/attn2/Reshape_3_output_0.out20_9_bfp.out27_0_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 629248 }, "/transformer_blocks.9/attn2/to_out.0/Add_output_0.out17_3_62_bfp.out25_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 630784 }, "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_47_bfp.out1_100": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 632320 }, "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_103": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 633856 }, "/transformer_blocks.9/attn/Concat_6_output_0_16_9.out18_1_9_bfp.out19_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 635392 }, "/transformer_blocks.9/attn/Concat_7_16_9.out18_1_9_bfp.out19_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 636928 }, "/transformer_blocks.9/attn/Concat_8_3d.out18_1_9_bfp.out23_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 638464 }, "/transformer_blocks.9/attn/Reshape_6_output_0.out18_1_9_bfp.out27_0_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 640000 }, "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 641536 }, "/transformer_blocks.9/Add_5_output_0.out10_102": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 643072 }, "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 646144 }, "/transformer_blocks.9/Add_output_0.out10_99": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 647680 }, "/transformer_blocks.9/Add_1_output_0.out10_100": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 650752 }, "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 653824 }, "/transformer_blocks.9/Add_3_output_0.out0_0_48_bfp.out1_102": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 655360 }, "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 656896 }, "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_63_bfp.out25_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 663040 }, "/transformer_blocks.9/Add_4_output_0.out10_101": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 664576 }, "/Add_9_output_0.out_35_1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 667648 }, "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 670720 }, "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_51_bfp.out1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 672256 }, "/transformer_blocks.10/norm1/Add_4_output_0.out0_0_50_bfp.out1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 673792 }, "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 675328 }, "/transformer_blocks.9/Add_7_output_0.out0_0_49_bfp.out1_104": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 676864 }, "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 678400 }, "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_64_bfp.out25_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 684544 }, "/transformer_blocks.9/Add_8_output_0.out10_103": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 686080 }, "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 689152 }, "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 690688 }, "/transformer_blocks.10/attn/Concat_6_output_0_16_10.out18_1_10_bfp.out19_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 692224 }, "/transformer_blocks.10/attn/Concat_7_16_10.out18_1_10_bfp.out19_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 693760 }, "/transformer_blocks.10/attn/Concat_8_3d.out18_1_10_bfp.out23_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 695296 }, "/transformer_blocks.10/attn/Reshape_6_output_0.out18_1_10_bfp.out27_0_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 696832 }, "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 698368 }, "/transformer_blocks.10/Add_output_0.out10_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 699904 }, "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 702976 }, "/transformer_blocks.10/attn2/Reshape_1_output_0.out17_0_64_bfp.out21_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 704512 }, "/transformer_blocks.10/Add_5_output_0.out10_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 706048 }, "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 709120 }, "/transformer_blocks.10/Add_7_output_0.out0_0_54_bfp.out1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 710656 }, "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 712192 }, "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_70_bfp.out25_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 718336 }, "/transformer_blocks.10/Add_8_output_0.out10_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 719872 }, "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 722944 }, "/transformer_blocks.10/attn2/Reshape_output_0.out17_0_65_bfp.out21_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 724480 }, "/transformer_blocks.10/attn2/to_v/Add_output_0.out17_3_67_bfp.out25_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 726016 }, "/transformer_blocks.10/attn2/Reshape_3_output_0.out20_10_bfp.out27_0_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 727552 }, "/transformer_blocks.10/attn2/to_out.0/Add_output_0.out17_3_68_bfp.out25_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 729088 }, "/transformer_blocks.10/Add_1_output_0.out10_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 730624 }, "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 733696 }, "/transformer_blocks.10/Add_3_output_0.out0_0_53_bfp.out1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 735232 }, "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 736768 }, "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_69_bfp.out25_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 742912 }, "/transformer_blocks.10/Add_4_output_0.out10_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 744448 }, "/Add_10_output_0.out_35_1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 747520 }, "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 750592 }, "/transformer_blocks.11/norm1/Add_4_output_0.out0_0_56_bfp.out1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 752128 }, "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_57_bfp.out1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 753664 }, "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_55_bfp.out1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 755200 }, "/transformer_blocks.11/attn/Concat_6_output_0_16_11.out18_1_11_bfp.out19_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 756736 }, "/transformer_blocks.11/attn/Concat_7_16_11.out18_1_11_bfp.out19_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 758272 }, "/transformer_blocks.11/attn/Concat_8_3d.out18_1_11_bfp.out23_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 759808 }, "/transformer_blocks.11/attn/Reshape_6_output_0.out18_1_11_bfp.out27_0_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 761344 }, "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 762880 }, "/transformer_blocks.11/Add_output_0.out10_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 764416 }, "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 767488 }, "/transformer_blocks.11/attn2/Reshape_1_output_0.out17_0_70_bfp.out21_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 769024 }, "/transformer_blocks.11/Add_5_output_0.out10_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 770560 }, "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 773632 }, "/transformer_blocks.11/Add_7_output_0.out0_0_59_bfp.out1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 775168 }, "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 776704 }, "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 782848 }, "/transformer_blocks.11/Add_8_output_0.out10_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 784384 }, "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 787456 }, "/transformer_blocks.11/attn2/Reshape_output_0.out17_0_71_bfp.out21_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 788992 }, "/transformer_blocks.11/attn2/to_v/Add_output_0.out17_3_73_bfp.out25_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 790528 }, "/transformer_blocks.11/attn2/Reshape_3_output_0.out20_11_bfp.out27_0_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 792064 }, "/transformer_blocks.11/attn2/to_out.0/Add_output_0.out17_3_74_bfp.out25_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 793600 }, "/transformer_blocks.11/Add_1_output_0.out10_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 795136 }, "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 798208 }, "/transformer_blocks.11/Add_3_output_0.out0_0_58_bfp.out1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 799744 }, "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 801280 }, "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_75_bfp.out25_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 807424 }, "/transformer_blocks.11/Add_4_output_0.out10_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 808960 }, "/Add_11_output_0.out_35_1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 812032 }, "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 815104 }, "/transformer_blocks.12/norm1/Add_4_output_0.out0_0_61_bfp.out1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 816640 }, "/transformer_blocks.12/attn2/Reshape_1_output_0.out17_0_76_bfp.out21_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 818176 }, "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_62_bfp.out1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 819712 }, "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 821248 }, "/transformer_blocks.12/attn/Concat_6_output_0_16_12.out18_1_12_bfp.out19_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 822784 }, "/transformer_blocks.12/attn/Concat_7_16_12.out18_1_12_bfp.out19_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 824320 }, "/transformer_blocks.12/attn/Concat_8_3d.out18_1_12_bfp.out23_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 825856 }, "/transformer_blocks.12/attn/Reshape_6_output_0.out18_1_12_bfp.out27_0_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 827392 }, "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 828928 }, "/transformer_blocks.12/Add_output_0.out10_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 830464 }, "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 833536 }, "/transformer_blocks.12/Add_5_output_0.out10_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 835072 }, "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 838144 }, "/transformer_blocks.12/Add_7_output_0.out0_0_64_bfp.out1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 839680 }, "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 841216 }, "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_82_bfp.out25_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 847360 }, "/transformer_blocks.12/Add_8_output_0.out10_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 848896 }, "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 851968 }, "/transformer_blocks.12/attn2/Reshape_output_0.out17_0_77_bfp.out21_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 853504 }, "/transformer_blocks.12/attn2/to_v/Add_output_0.out17_3_79_bfp.out25_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1", "False" ], "offset": 855040 }, "/transformer_blocks.12/attn2/Reshape_3_output_0.out20_12_bfp.out27_0_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 856576 }, "/transformer_blocks.12/attn2/to_out.0/Add_output_0.out17_3_80_bfp.out25_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 858112 }, "/transformer_blocks.12/Add_1_output_0.out10_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 859648 }, "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 862720 }, "/transformer_blocks.12/Add_3_output_0.out0_0_63_bfp.out1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 864256 }, "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 865792 }, "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_81_bfp.out25_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 871936 }, "/transformer_blocks.12/Add_4_output_0.out10_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 873472 }, "/Add_12_output_0.out_35_1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 876544 }, "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 879616 }, "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_66_bfp.out1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 881152 }, "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_65_bfp.out1_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 882688 }, "/transformer_blocks.13/attn/Concat_6_output_0_16_13.out18_1_13_bfp.out19_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 884224 }, "/transformer_blocks.13/attn/Concat_7_16_13.out18_1_13_bfp.out19_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 885760 }, "/transformer_blocks.13/attn/Concat_8_3d.out18_1_13_bfp.out23_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 887296 }, "/transformer_blocks.13/attn/Reshape_6_output_0.out18_1_13_bfp.out27_0_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 888832 }, "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 890368 }, "/transformer_blocks.13/Add_output_0.out10_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 891904 }, "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 894976 }, "/transformer_blocks.13/Add_2_output_0.out0_0_67_bfp.out1_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 896512 }, "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 898048 }, "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_85_bfp.out25_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 904192 }, "/transformer_blocks.13/Add_3_output_0.out10_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 905728 }, "/Add_13_output_0.out_35_1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 908800 }, "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 911872 }, "/transformer_blocks.13/Add_4_output_0.out10_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 913408 }, "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 916480 }, "/transformer_blocks.13/Add_6_output_0.out0_0_68_bfp.out1_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 918016 }, "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 919552 }, "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_86_bfp.out25_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 925696 }, "/transformer_blocks.13/Add_7_output_0.out10_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 927232 }, "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 930304 }, "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_70_bfp.out1_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 931840 }, "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 933376 }, "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_69_bfp.out1_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 934912 }, "/transformer_blocks.14/attn/Concat_6_output_0_16_14.out18_1_14_bfp.out19_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 936448 }, "/transformer_blocks.14/attn/Concat_7_16_14.out18_1_14_bfp.out19_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 937984 }, "/transformer_blocks.14/attn/Concat_8_3d.out18_1_14_bfp.out23_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 939520 }, "/transformer_blocks.14/attn/Reshape_6_output_0.out18_1_14_bfp.out27_0_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 941056 }, "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 942592 }, "/transformer_blocks.14/Add_output_0.out10_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 944128 }, "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 947200 }, "/transformer_blocks.14/Add_2_output_0.out0_0_71_bfp.out1_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 948736 }, "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 950272 }, "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_89_bfp.out25_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 956416 }, "/transformer_blocks.14/Add_3_output_0.out10_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 957952 }, "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 961024 }, "/transformer_blocks.14/Add_4_output_0.out10_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 962560 }, "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 965632 }, "/transformer_blocks.14/Add_6_output_0.out0_0_72_bfp.out1_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 967168 }, "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 968704 }, "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_90_bfp.out25_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 974848 }, "/transformer_blocks.14/Add_7_output_0.out10_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 976384 }, "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 979456 }, "/Add_14_output_0.out_35_1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 980992 }, "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 984064 }, "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_74_bfp.out1_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 985600 }, "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_73_bfp.out1_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 987136 }, "/transformer_blocks.15/attn/Concat_6_output_0_16_15.out18_1_15_bfp.out19_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 988672 }, "/transformer_blocks.15/attn/Concat_7_16_15.out18_1_15_bfp.out19_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 990208 }, "/transformer_blocks.15/attn/Concat_8_3d.out18_1_15_bfp.out23_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 991744 }, "/transformer_blocks.15/attn/Reshape_6_output_0.out18_1_15_bfp.out27_0_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 993280 }, "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 994816 }, "/transformer_blocks.15/Add_output_0.out10_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 996352 }, "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 999424 }, "/transformer_blocks.15/Add_2_output_0.out0_0_75_bfp.out1_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1000960 }, "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1002496 }, "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_93_bfp.out25_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1008640 }, "/transformer_blocks.15/Add_3_output_0.out10_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1010176 }, "/Add_15_output_0.out_35_1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1013248 }, "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1016320 }, "/transformer_blocks.15/Add_4_output_0.out10_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1017856 }, "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1020928 }, "/transformer_blocks.15/Add_6_output_0.out0_0_76_bfp.out1_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1022464 }, "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1024000 }, "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_94_bfp.out25_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1030144 }, "/transformer_blocks.15/Add_7_output_0.out10_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1031680 }, "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1034752 }, "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_78_bfp.out1_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1036288 }, "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1037824 }, "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_77_bfp.out1_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1039360 }, "/transformer_blocks.16/attn/Concat_6_output_0_16_16.out18_1_16_bfp.out19_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1040896 }, "/transformer_blocks.16/attn/Concat_7_16_16.out18_1_16_bfp.out19_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1042432 }, "/transformer_blocks.16/attn/Concat_8_3d.out18_1_16_bfp.out23_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1043968 }, "/transformer_blocks.16/attn/Reshape_6_output_0.out18_1_16_bfp.out27_0_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1045504 }, "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1047040 }, "/transformer_blocks.16/Add_output_0.out10_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1048576 }, "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1051648 }, "/transformer_blocks.16/Add_2_output_0.out0_0_79_bfp.out1_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1053184 }, "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1054720 }, "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_97_bfp.out25_97": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1060864 }, "/transformer_blocks.16/Add_3_output_0.out10_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1062400 }, "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1065472 }, "/transformer_blocks.16/Add_4_output_0.out10_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1067008 }, "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1070080 }, "/transformer_blocks.16/Add_6_output_0.out0_0_80_bfp.out1_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1071616 }, "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_96": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1073152 }, "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_98_bfp.out25_98": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1079296 }, "/transformer_blocks.16/Add_7_output_0.out10_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1080832 }, "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1083904 }, "/Add_16_output_0.out_35_1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1085440 }, "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1088512 }, "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_82_bfp.out1_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1090048 }, "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_81_bfp.out1_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1091584 }, "/transformer_blocks.17/attn/Concat_6_output_0_16_17.out18_1_17_bfp.out19_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1093120 }, "/transformer_blocks.17/attn/Concat_7_16_17.out18_1_17_bfp.out19_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1094656 }, "/transformer_blocks.17/attn/Concat_8_3d.out18_1_17_bfp.out23_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1096192 }, "/transformer_blocks.17/attn/Reshape_6_output_0.out18_1_17_bfp.out27_0_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1097728 }, "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1099264 }, "/transformer_blocks.17/Add_output_0.out10_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1100800 }, "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1103872 }, "/transformer_blocks.17/Add_2_output_0.out0_0_83_bfp.out1_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1105408 }, "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_99": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1106944 }, "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_101_bfp.out25_101": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1113088 }, "/transformer_blocks.17/Add_3_output_0.out10_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1114624 }, "/Add_17_output_0.out_35_1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1117696 }, "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1120768 }, "/transformer_blocks.17/Add_4_output_0.out10_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1122304 }, "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1125376 }, "/transformer_blocks.17/Add_6_output_0.out0_0_84_bfp.out1_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1126912 }, "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_100": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1128448 }, "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_102": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1134592 }, "/transformer_blocks.17/Add_7_output_0.out10_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1136128 }, "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1139200 }, "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_86_bfp.out1_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1140736 }, "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1142272 }, "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_85_bfp.out1_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1143808 }, "/transformer_blocks.18/attn/Concat_6_output_0_16_18.out18_1_18_bfp.out19_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1145344 }, "/transformer_blocks.18/attn/Concat_7_16_18.out18_1_18_bfp.out19_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1146880 }, "/transformer_blocks.18/attn/Concat_8_3d.out18_1_18_bfp.out23_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1148416 }, "/transformer_blocks.18/attn/Reshape_6_output_0.out18_1_18_bfp.out27_0_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1149952 }, "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1151488 }, "/transformer_blocks.18/Add_output_0.out10_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1153024 }, "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1156096 }, "/transformer_blocks.18/Add_2_output_0.out0_0_87_bfp.out1_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1157632 }, "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_103": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1159168 }, "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_105_bfp.out25_105": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1165312 }, "/transformer_blocks.18/Add_3_output_0.out10_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1166848 }, "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1169920 }, "/transformer_blocks.18/Add_4_output_0.out10_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1171456 }, "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1174528 }, "/transformer_blocks.18/Add_6_output_0.out0_0_88_bfp.out1_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1176064 }, "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_104": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1177600 }, "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_106_bfp.out25_106": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1183744 }, "/transformer_blocks.18/Add_7_output_0.out10_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1185280 }, "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1188352 }, "/Add_18_output_0.out_35_1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1189888 }, "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1192960 }, "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_90_bfp.out1_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1194496 }, "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_89_bfp.out1_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1196032 }, "/transformer_blocks.19/attn/Concat_6_output_0_16_19.out18_1_19_bfp.out19_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1197568 }, "/transformer_blocks.19/attn/Concat_7_16_19.out18_1_19_bfp.out19_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1199104 }, "/transformer_blocks.19/attn/Concat_8_3d.out18_1_19_bfp.out23_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1200640 }, "/transformer_blocks.19/attn/Reshape_6_output_0.out18_1_19_bfp.out27_0_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1202176 }, "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1203712 }, "/transformer_blocks.19/Add_output_0.out10_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1205248 }, "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1208320 }, "/transformer_blocks.19/Add_2_output_0.out0_0_91_bfp.out1_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1209856 }, "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_107": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1211392 }, "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_109_bfp.out25_109": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1217536 }, "/transformer_blocks.19/Add_3_output_0.out10_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1219072 }, "/Add_19_output_0.out_35_1_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1222144 }, "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1225216 }, "/transformer_blocks.19/Add_4_output_0.out10_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1226752 }, "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1229824 }, "/transformer_blocks.19/Add_6_output_0.out0_0_92_bfp.out1_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1231360 }, "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_108": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1232896 }, "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_110_bfp.out25_110": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1239040 }, "/transformer_blocks.19/Add_7_output_0.out10_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1240576 }, "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1243648 }, "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1245184 }, "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_94_bfp.out1_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1246720 }, "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_93_bfp.out1_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1248256 }, "/transformer_blocks.20/attn/Concat_6_output_0_16_20.out18_1_20_bfp.out19_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1249792 }, "/transformer_blocks.20/attn/Concat_7_16_20.out18_1_20_bfp.out19_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1251328 }, "/transformer_blocks.20/attn/Concat_8_3d.out18_1_20_bfp.out23_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1252864 }, "/transformer_blocks.20/attn/Reshape_6_output_0.out18_1_20_bfp.out27_0_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1254400 }, "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1255936 }, "/transformer_blocks.20/Add_4_output_0.out10_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1257472 }, "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1260544 }, "/transformer_blocks.20/Add_output_0.out10_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1262080 }, "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1265152 }, "/transformer_blocks.20/Add_2_output_0.out0_0_95_bfp.out1_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1266688 }, "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_111": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1268224 }, "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_113_bfp.out25_113": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1274368 }, "/transformer_blocks.20/Add_3_output_0.out10_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1275904 }, "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1278976 }, "/transformer_blocks.20/Add_6_output_0.out0_0_96_bfp.out1_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1280512 }, "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_112": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1282048 }, "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_114_bfp.out25_114": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1288192 }, "/transformer_blocks.20/Add_7_output_0.out10_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1289728 }, "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1292800 }, "/Add_20_output_0.out_35_1_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1294336 }, "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1297408 }, "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_98_bfp.out1_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1298944 }, "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_97_bfp.out1_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1300480 }, "/transformer_blocks.21/attn/Concat_6_output_0_16_21.out18_1_21_bfp.out19_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1302016 }, "/transformer_blocks.21/attn/Concat_7_16_21.out18_1_21_bfp.out19_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1303552 }, "/transformer_blocks.21/attn/Concat_8_3d.out18_1_21_bfp.out23_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1305088 }, "/transformer_blocks.21/attn/Reshape_6_output_0.out18_1_21_bfp.out27_0_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1306624 }, "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1308160 }, "/transformer_blocks.21/Add_output_0.out10_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1309696 }, "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1312768 }, "/transformer_blocks.21/Add_2_output_0.out0_0_99_bfp.out1_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1314304 }, "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_115": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1315840 }, "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_117_bfp.out25_117": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1321984 }, "/transformer_blocks.21/Add_3_output_0.out10_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1323520 }, "/Add_21_output_0.out_35_1_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1326592 }, "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1329664 }, "/transformer_blocks.21/Add_4_output_0.out10_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1331200 }, "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1334272 }, "/transformer_blocks.21/Add_6_output_0.out0_0_100_bfp.out1_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1335808 }, "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_116": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1337344 }, "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_118_bfp.out25_118": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1343488 }, "/transformer_blocks.21/Add_7_output_0.out10_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1345024 }, "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1348096 }, "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_102_bfp.out1_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1349632 }, "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1351168 }, "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_101_bfp.out1_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1352704 }, "/transformer_blocks.22/attn/Concat_6_output_0_16_22.out18_1_22_bfp.out19_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1354240 }, "/transformer_blocks.22/attn/Concat_7_16_22.out18_1_22_bfp.out19_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1355776 }, "/transformer_blocks.22/attn/Concat_8_3d.out18_1_22_bfp.out23_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1357312 }, "/transformer_blocks.22/attn/Reshape_6_output_0.out18_1_22_bfp.out27_0_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1358848 }, "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1360384 }, "/transformer_blocks.22/Add_output_0.out10_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1361920 }, "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1364992 }, "/transformer_blocks.22/Add_2_output_0.out0_0_103_bfp.out1_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1366528 }, "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_119": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1368064 }, "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_121_bfp.out25_121": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1374208 }, "/transformer_blocks.22/Add_3_output_0.out10_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1375744 }, "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1378816 }, "/transformer_blocks.22/Add_4_output_0.out10_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1380352 }, "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1383424 }, "/transformer_blocks.22/Add_6_output_0.out0_0_104_bfp.out1_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1384960 }, "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_120": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1386496 }, "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_122_bfp.out25_122": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1392640 }, "/transformer_blocks.22/Add_7_output_0.out10_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1394176 }, "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1397248 }, "/Add_22_output_0.out_35_1_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1398784 }, "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1401856 }, "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_106_bfp.out1_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1403392 }, "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_105_bfp.out1_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length", "False" ], "offset": 1404928 }, "/transformer_blocks.23/attn/Concat_6_output_0_16_23.out18_1_23_bfp.out19_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1406464 }, "/transformer_blocks.23/attn/Concat_7_16_23.out18_1_23_bfp.out19_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1 + max_length", "False" ], "offset": 1408000 }, "/transformer_blocks.23/attn/Concat_8_3d.out18_1_23_bfp.out23_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "False", "state_dim1+max_length", "False" ], "offset": 1409536 }, "/transformer_blocks.23/attn/Reshape_6_output_0.out18_1_23_bfp.out27_0_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "max_length + state_dim1", "False" ], "offset": 1411072 }, "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1412608 }, "/transformer_blocks.23/Add_output_0.out10_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1414144 }, "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1417216 }, "/transformer_blocks.23/Add_2_output_0.out0_0_107_bfp.out1_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1418752 }, "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_123": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1420288 }, "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_125_bfp.out25_124": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1426432 }, "/transformer_blocks.23/Add_3_output_0.out10_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1427968 }, "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1431040 }, "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1432576 }, "/norm_out/Add_2_output_0.out0_0_108": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_2", "state_dim1", "False" ], "offset": 1434112 }, "existing_model.pos_embed.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 0, "file_name": "cache/pos_embedprojConv_0.const", "file_size": 135168 }, "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 448512 ], "size_in_bytes": 448512, "op_tensor_size": 448512, "offset": 135168, "file_name": "cache/pos_embedprojConv_1.const", "file_size": 448512 }, "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 583680, "file_name": "cache/pos_embedprojConv_2.const", "file_size": 256 }, "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 583936, "file_name": "cache/pos_embedprojConv_3.const", "file_size": 2691072 }, "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 3588096 ], "size_in_bytes": 3588096, "op_tensor_size": 3588096, "offset": 3275008, "file_name": "cache/pos_embedprojConv_4.const", "file_size": 3588096 }, "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 6863104, "file_name": "cache/pos_embedprojConv_5.const", "file_size": 256 }, "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 6863360, "file_name": "cache/pos_embedprojConv_6.const", "file_size": 2691072 }, "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 9554432, "file_name": "cache/pos_embedprojConv_7.const", "file_size": 256 }, "encoder_hidden_states.out17_3_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 9554688, "file_name": "cache/pos_embedprojConv_8.const", "file_size": 128 }, "onnx::MatMul_11911": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 7274496 ], "size_in_bytes": 7274496, "op_tensor_size": 7274496, "offset": 9554816, "file_name": "cache/pos_embedprojConv_9.const", "file_size": 7274496 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 16829312, "file_name": "cache/pos_embedprojConv_10.const", "file_size": 128 }, "/transformer_blocks.0/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16829440, "file_name": "cache/pos_embedprojConv_11.const", "file_size": 3072 }, "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16832512, "file_name": "cache/pos_embedprojConv_12.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 16835584, "file_name": "cache/pos_embedprojConv_13.const", "file_size": 5382144 }, "/transformer_blocks.0/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22217728, "file_name": "cache/pos_embedprojConv_14.const", "file_size": 3072 }, "/transformer_blocks.0/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22220800, "file_name": "cache/pos_embedprojConv_15.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 22223872, "file_name": "cache/pos_embedprojConv_16.const", "file_size": 5382144 }, "onnx::MatMul_11927_onnx::MatMul_11912": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 27606016, "file_name": "cache/pos_embedprojConv_17.const", "file_size": 5382400 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 32988416, "file_name": "cache/pos_embedprojConv_18.const", "file_size": 5382144 }, "onnx::MatMul_11928_onnx::MatMul_11913": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 38370560, "file_name": "cache/pos_embedprojConv_19.const", "file_size": 5382400 }, "onnx::MatMul_11929_onnx::MatMul_11914": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 43752960, "file_name": "cache/pos_embedprojConv_20.const", "file_size": 5382144 }, "onnx::MatMul_11937": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 49135104, "file_name": "cache/pos_embedprojConv_21.const", "file_size": 2691072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 51826176, "file_name": "cache/pos_embedprojConv_22.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 54517248, "file_name": "cache/pos_embedprojConv_23.const", "file_size": 3072 }, "/transformer_blocks.0/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 54520320, "file_name": "cache/pos_embedprojConv_24.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 54523392, "file_name": "cache/pos_embedprojConv_25.const", "file_size": 5382144 }, "onnx::MatMul_11952": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 59905536, "file_name": "cache/pos_embedprojConv_26.const", "file_size": 10764288 }, "onnx::MatMul_11953": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 70669824, "file_name": "cache/pos_embedprojConv_27.const", "file_size": 10764288 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 81434112, "file_name": "cache/pos_embedprojConv_28.const", "file_size": 2691072 }, "onnx::MatMul_11938": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 84125184, "file_name": "cache/pos_embedprojConv_29.const", "file_size": 2691072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 86816256, "file_name": "cache/pos_embedprojConv_30.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 89507328, "file_name": "cache/pos_embedprojConv_31.const", "file_size": 3072 }, "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 89510400, "file_name": "cache/pos_embedprojConv_32.const", "file_size": 3072 }, "onnx::MatMul_11940": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 89513472, "file_name": "cache/pos_embedprojConv_33.const", "file_size": 2691200 }, "onnx::MatMul_11939": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 92204672, "file_name": "cache/pos_embedprojConv_34.const", "file_size": 2691200 }, "onnx::MatMul_11941": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 94895872, "file_name": "cache/pos_embedprojConv_35.const", "file_size": 2691072 }, "onnx::MatMul_11949": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 97586944, "file_name": "cache/pos_embedprojConv_36.const", "file_size": 2691072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 100278016, "file_name": "cache/pos_embedprojConv_37.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 102969088, "file_name": "cache/pos_embedprojConv_38.const", "file_size": 3072 }, "/transformer_blocks.0/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 102972160, "file_name": "cache/pos_embedprojConv_39.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_6_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 102975232, "file_name": "cache/pos_embedprojConv_40.const", "file_size": 5382144 }, "onnx::MatMul_11950": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 108357376, "file_name": "cache/pos_embedprojConv_41.const", "file_size": 10764288 }, "onnx::MatMul_11951": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 119121664, "file_name": "cache/pos_embedprojConv_42.const", "file_size": 10764288 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 129885952, "file_name": "cache/pos_embedprojConv_43.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 132577024, "file_name": "cache/pos_embedprojConv_44.const", "file_size": 3072 }, "/transformer_blocks.1/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 132580096, "file_name": "cache/pos_embedprojConv_45.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 132583168, "file_name": "cache/pos_embedprojConv_46.const", "file_size": 5382144 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 137965312, "file_name": "cache/pos_embedprojConv_47.const", "file_size": 5382144 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 143347456, "file_name": "cache/pos_embedprojConv_48.const", "file_size": 5382144 }, "onnx::MatMul_11969_onnx::MatMul_11954": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 148729600, "file_name": "cache/pos_embedprojConv_49.const", "file_size": 5382400 }, "onnx::MatMul_11970_onnx::MatMul_11955": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 154112000, "file_name": "cache/pos_embedprojConv_50.const", "file_size": 5382400 }, "onnx::MatMul_11971_onnx::MatMul_11956": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 159494400, "file_name": "cache/pos_embedprojConv_51.const", "file_size": 5382144 }, "onnx::MatMul_11980": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 164876544, "file_name": "cache/pos_embedprojConv_52.const", "file_size": 2691072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 167567616, "file_name": "cache/pos_embedprojConv_53.const", "file_size": 2691072 }, "onnx::MatMul_11979": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 170258688, "file_name": "cache/pos_embedprojConv_54.const", "file_size": 2691072 }, "onnx::MatMul_11982": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 172949760, "file_name": "cache/pos_embedprojConv_55.const", "file_size": 2691200 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 175640960, "file_name": "cache/pos_embedprojConv_56.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 178332032, "file_name": "cache/pos_embedprojConv_57.const", "file_size": 3072 }, "/transformer_blocks.1/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 178335104, "file_name": "cache/pos_embedprojConv_58.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 178338176, "file_name": "cache/pos_embedprojConv_59.const", "file_size": 5382144 }, "onnx::MatMul_11994": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 183720320, "file_name": "cache/pos_embedprojConv_60.const", "file_size": 10764288 }, "onnx::MatMul_11995": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 194484608, "file_name": "cache/pos_embedprojConv_61.const", "file_size": 10764288 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 205248896, "file_name": "cache/pos_embedprojConv_62.const", "file_size": 2691072 }, "/transformer_blocks.2/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 207939968, "file_name": "cache/pos_embedprojConv_63.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 207943040, "file_name": "cache/pos_embedprojConv_64.const", "file_size": 3072 }, "onnx::MatMul_11981": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 207946112, "file_name": "cache/pos_embedprojConv_65.const", "file_size": 2691200 }, "onnx::MatMul_11983": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 210637312, "file_name": "cache/pos_embedprojConv_66.const", "file_size": 2691072 }, "onnx::MatMul_11991": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 213328384, "file_name": "cache/pos_embedprojConv_67.const", "file_size": 2691072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 216019456, "file_name": "cache/pos_embedprojConv_68.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 218710528, "file_name": "cache/pos_embedprojConv_69.const", "file_size": 3072 }, "/transformer_blocks.1/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 218713600, "file_name": "cache/pos_embedprojConv_70.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_6_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 218716672, "file_name": "cache/pos_embedprojConv_71.const", "file_size": 5382144 }, "onnx::MatMul_11992": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 224098816, "file_name": "cache/pos_embedprojConv_72.const", "file_size": 10764288 }, "onnx::MatMul_11993": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 234863104, "file_name": "cache/pos_embedprojConv_73.const", "file_size": 10764288 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 245627392, "file_name": "cache/pos_embedprojConv_74.const", "file_size": 2691072 }, "/transformer_blocks.2/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248318464, "file_name": "cache/pos_embedprojConv_75.const", "file_size": 3072 }, "/transformer_blocks.2/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248321536, "file_name": "cache/pos_embedprojConv_76.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 248324608, "file_name": "cache/pos_embedprojConv_77.const", "file_size": 5382144 }, "onnx::MatMul_12024": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 253706752, "file_name": "cache/pos_embedprojConv_78.const", "file_size": 2691200 }, "onnx::MatMul_12023": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 256397952, "file_name": "cache/pos_embedprojConv_79.const", "file_size": 2691200 }, "onnx::MatMul_12025": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 259089152, "file_name": "cache/pos_embedprojConv_80.const", "file_size": 2691072 }, "onnx::MatMul_12033": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 261780224, "file_name": "cache/pos_embedprojConv_81.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 264471296, "file_name": "cache/pos_embedprojConv_82.const", "file_size": 5382144 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 269853440, "file_name": "cache/pos_embedprojConv_83.const", "file_size": 5382144 }, "onnx::MatMul_12011_onnx::MatMul_11996": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 275235584, "file_name": "cache/pos_embedprojConv_84.const", "file_size": 5382400 }, "onnx::MatMul_12012_onnx::MatMul_11997": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 280617984, "file_name": "cache/pos_embedprojConv_85.const", "file_size": 5382400 }, "onnx::MatMul_12013_onnx::MatMul_11998": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 286000384, "file_name": "cache/pos_embedprojConv_86.const", "file_size": 5382144 }, "onnx::MatMul_12021": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 291382528, "file_name": "cache/pos_embedprojConv_87.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 294073600, "file_name": "cache/pos_embedprojConv_88.const", "file_size": 2691072 }, "onnx::MatMul_12022": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 296764672, "file_name": "cache/pos_embedprojConv_89.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 299455744, "file_name": "cache/pos_embedprojConv_90.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 302146816, "file_name": "cache/pos_embedprojConv_91.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 304837888, "file_name": "cache/pos_embedprojConv_92.const", "file_size": 3072 }, "/transformer_blocks.2/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 304840960, "file_name": "cache/pos_embedprojConv_93.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_6_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 304844032, "file_name": "cache/pos_embedprojConv_94.const", "file_size": 5382144 }, "onnx::MatMul_12034": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 310226176, "file_name": "cache/pos_embedprojConv_95.const", "file_size": 10764288 }, "onnx::MatMul_12035": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 320990464, "file_name": "cache/pos_embedprojConv_96.const", "file_size": 10764288 }, "/transformer_blocks.2/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 331754752, "file_name": "cache/pos_embedprojConv_97.const", "file_size": 3072 }, "/transformer_blocks.2/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 331757824, "file_name": "cache/pos_embedprojConv_98.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 331760896, "file_name": "cache/pos_embedprojConv_99.const", "file_size": 5382144 }, "onnx::MatMul_12036": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 337143040, "file_name": "cache/pos_embedprojConv_100.const", "file_size": 10764288 }, "onnx::MatMul_12037": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 347907328, "file_name": "cache/pos_embedprojConv_101.const", "file_size": 10764288 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 358671616, "file_name": "cache/pos_embedprojConv_102.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 361362688, "file_name": "cache/pos_embedprojConv_103.const", "file_size": 3072 }, "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 361365760, "file_name": "cache/pos_embedprojConv_104.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 361368832, "file_name": "cache/pos_embedprojConv_105.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 364059904, "file_name": "cache/pos_embedprojConv_106.const", "file_size": 3072 }, "/transformer_blocks.3/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 364062976, "file_name": "cache/pos_embedprojConv_107.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 364066048, "file_name": "cache/pos_embedprojConv_108.const", "file_size": 5382144 }, "onnx::MatMul_12066": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 369448192, "file_name": "cache/pos_embedprojConv_109.const", "file_size": 2691200 }, "onnx::MatMul_12065": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 372139392, "file_name": "cache/pos_embedprojConv_110.const", "file_size": 2691200 }, "onnx::MatMul_12067": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 374830592, "file_name": "cache/pos_embedprojConv_111.const", "file_size": 2691072 }, "onnx::MatMul_12075": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 377521664, "file_name": "cache/pos_embedprojConv_112.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 380212736, "file_name": "cache/pos_embedprojConv_113.const", "file_size": 5382144 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 385594880, "file_name": "cache/pos_embedprojConv_114.const", "file_size": 5382144 }, "onnx::MatMul_12053_onnx::MatMul_12038": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 390977024, "file_name": "cache/pos_embedprojConv_115.const", "file_size": 5382400 }, "onnx::MatMul_12054_onnx::MatMul_12039": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 396359424, "file_name": "cache/pos_embedprojConv_116.const", "file_size": 5382400 }, "onnx::MatMul_12055_onnx::MatMul_12040": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 401741824, "file_name": "cache/pos_embedprojConv_117.const", "file_size": 5382144 }, "onnx::MatMul_12063": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 407123968, "file_name": "cache/pos_embedprojConv_118.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 409815040, "file_name": "cache/pos_embedprojConv_119.const", "file_size": 2691072 }, "onnx::MatMul_12064": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 412506112, "file_name": "cache/pos_embedprojConv_120.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 415197184, "file_name": "cache/pos_embedprojConv_121.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 417888256, "file_name": "cache/pos_embedprojConv_122.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 420579328, "file_name": "cache/pos_embedprojConv_123.const", "file_size": 3072 }, "/transformer_blocks.3/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 420582400, "file_name": "cache/pos_embedprojConv_124.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_6_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 420585472, "file_name": "cache/pos_embedprojConv_125.const", "file_size": 5382144 }, "onnx::MatMul_12076": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 425967616, "file_name": "cache/pos_embedprojConv_126.const", "file_size": 10764288 }, "onnx::MatMul_12077": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 436731904, "file_name": "cache/pos_embedprojConv_127.const", "file_size": 10764288 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 447496192, "file_name": "cache/pos_embedprojConv_128.const", "file_size": 2691072 }, "/transformer_blocks.4/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 450187264, "file_name": "cache/pos_embedprojConv_129.const", "file_size": 3072 }, "/transformer_blocks.4/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 450190336, "file_name": "cache/pos_embedprojConv_130.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 450193408, "file_name": "cache/pos_embedprojConv_131.const", "file_size": 5382144 }, "/transformer_blocks.3/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 455575552, "file_name": "cache/pos_embedprojConv_132.const", "file_size": 3072 }, "/transformer_blocks.3/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 455578624, "file_name": "cache/pos_embedprojConv_133.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 455581696, "file_name": "cache/pos_embedprojConv_134.const", "file_size": 5382144 }, "onnx::MatMul_12078": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 460963840, "file_name": "cache/pos_embedprojConv_135.const", "file_size": 10764288 }, "onnx::MatMul_12079": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 471728128, "file_name": "cache/pos_embedprojConv_136.const", "file_size": 10764288 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 482492416, "file_name": "cache/pos_embedprojConv_137.const", "file_size": 2691072 }, "/transformer_blocks.4/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485183488, "file_name": "cache/pos_embedprojConv_138.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485186560, "file_name": "cache/pos_embedprojConv_139.const", "file_size": 3072 }, "onnx::MatMul_12108": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 485189632, "file_name": "cache/pos_embedprojConv_140.const", "file_size": 2691200 }, "onnx::MatMul_12107": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 487880832, "file_name": "cache/pos_embedprojConv_141.const", "file_size": 2691200 }, "onnx::MatMul_12109": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 490572032, "file_name": "cache/pos_embedprojConv_142.const", "file_size": 2691072 }, "onnx::MatMul_12117": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 493263104, "file_name": "cache/pos_embedprojConv_143.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 495954176, "file_name": "cache/pos_embedprojConv_144.const", "file_size": 5382144 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 501336320, "file_name": "cache/pos_embedprojConv_145.const", "file_size": 5382144 }, "onnx::MatMul_12095_onnx::MatMul_12080": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 506718464, "file_name": "cache/pos_embedprojConv_146.const", "file_size": 5382400 }, "onnx::MatMul_12096_onnx::MatMul_12081": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 512100864, "file_name": "cache/pos_embedprojConv_147.const", "file_size": 5382400 }, "onnx::MatMul_12097_onnx::MatMul_12082": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 517483264, "file_name": "cache/pos_embedprojConv_148.const", "file_size": 5382144 }, "onnx::MatMul_12105": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 522865408, "file_name": "cache/pos_embedprojConv_149.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 525556480, "file_name": "cache/pos_embedprojConv_150.const", "file_size": 2691072 }, "onnx::MatMul_12106": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 528247552, "file_name": "cache/pos_embedprojConv_151.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 530938624, "file_name": "cache/pos_embedprojConv_152.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 533629696, "file_name": "cache/pos_embedprojConv_153.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 536320768, "file_name": "cache/pos_embedprojConv_154.const", "file_size": 3072 }, "/transformer_blocks.4/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 536323840, "file_name": "cache/pos_embedprojConv_155.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_6_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 536326912, "file_name": "cache/pos_embedprojConv_156.const", "file_size": 5382144 }, "onnx::MatMul_12118": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 541709056, "file_name": "cache/pos_embedprojConv_157.const", "file_size": 10764288 }, "onnx::MatMul_12119": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 552473344, "file_name": "cache/pos_embedprojConv_158.const", "file_size": 10764288 }, "/transformer_blocks.4/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 563237632, "file_name": "cache/pos_embedprojConv_159.const", "file_size": 3072 }, "/transformer_blocks.4/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 563240704, "file_name": "cache/pos_embedprojConv_160.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 563243776, "file_name": "cache/pos_embedprojConv_161.const", "file_size": 5382144 }, "onnx::MatMul_12120": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 568625920, "file_name": "cache/pos_embedprojConv_162.const", "file_size": 10764288 }, "onnx::MatMul_12121": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 579390208, "file_name": "cache/pos_embedprojConv_163.const", "file_size": 10764288 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 590154496, "file_name": "cache/pos_embedprojConv_164.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 592845568, "file_name": "cache/pos_embedprojConv_165.const", "file_size": 3072 }, "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 592848640, "file_name": "cache/pos_embedprojConv_166.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 592851712, "file_name": "cache/pos_embedprojConv_167.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 595542784, "file_name": "cache/pos_embedprojConv_168.const", "file_size": 3072 }, "/transformer_blocks.5/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 595545856, "file_name": "cache/pos_embedprojConv_169.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 595548928, "file_name": "cache/pos_embedprojConv_170.const", "file_size": 5382144 }, "onnx::MatMul_12150": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 600931072, "file_name": "cache/pos_embedprojConv_171.const", "file_size": 2691200 }, "onnx::MatMul_12149": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 603622272, "file_name": "cache/pos_embedprojConv_172.const", "file_size": 2691200 }, "onnx::MatMul_12151": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 606313472, "file_name": "cache/pos_embedprojConv_173.const", "file_size": 2691072 }, "onnx::MatMul_12159": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 609004544, "file_name": "cache/pos_embedprojConv_174.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 611695616, "file_name": "cache/pos_embedprojConv_175.const", "file_size": 5382144 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 617077760, "file_name": "cache/pos_embedprojConv_176.const", "file_size": 5382144 }, "onnx::MatMul_12137_onnx::MatMul_12122": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 622459904, "file_name": "cache/pos_embedprojConv_177.const", "file_size": 5382400 }, "onnx::MatMul_12138_onnx::MatMul_12123": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 627842304, "file_name": "cache/pos_embedprojConv_178.const", "file_size": 5382400 }, "onnx::MatMul_12139_onnx::MatMul_12124": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 633224704, "file_name": "cache/pos_embedprojConv_179.const", "file_size": 5382144 }, "onnx::MatMul_12147": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 638606848, "file_name": "cache/pos_embedprojConv_180.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 641297920, "file_name": "cache/pos_embedprojConv_181.const", "file_size": 2691072 }, "onnx::MatMul_12148": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 643988992, "file_name": "cache/pos_embedprojConv_182.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 646680064, "file_name": "cache/pos_embedprojConv_183.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 649371136, "file_name": "cache/pos_embedprojConv_184.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 652062208, "file_name": "cache/pos_embedprojConv_185.const", "file_size": 3072 }, "/transformer_blocks.5/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 652065280, "file_name": "cache/pos_embedprojConv_186.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_6_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 652068352, "file_name": "cache/pos_embedprojConv_187.const", "file_size": 5382144 }, "onnx::MatMul_12160": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 657450496, "file_name": "cache/pos_embedprojConv_188.const", "file_size": 10764288 }, "onnx::MatMul_12161": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 668214784, "file_name": "cache/pos_embedprojConv_189.const", "file_size": 10764288 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 678979072, "file_name": "cache/pos_embedprojConv_190.const", "file_size": 2691072 }, "/transformer_blocks.6/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 681670144, "file_name": "cache/pos_embedprojConv_191.const", "file_size": 3072 }, "/transformer_blocks.6/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 681673216, "file_name": "cache/pos_embedprojConv_192.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 681676288, "file_name": "cache/pos_embedprojConv_193.const", "file_size": 5382144 }, "/transformer_blocks.5/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 687058432, "file_name": "cache/pos_embedprojConv_194.const", "file_size": 3072 }, "/transformer_blocks.5/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 687061504, "file_name": "cache/pos_embedprojConv_195.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 687064576, "file_name": "cache/pos_embedprojConv_196.const", "file_size": 5382144 }, "onnx::MatMul_12162": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 692446720, "file_name": "cache/pos_embedprojConv_197.const", "file_size": 10764288 }, "onnx::MatMul_12163": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 703211008, "file_name": "cache/pos_embedprojConv_198.const", "file_size": 10764288 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 713975296, "file_name": "cache/pos_embedprojConv_199.const", "file_size": 2691072 }, "/transformer_blocks.6/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 716666368, "file_name": "cache/pos_embedprojConv_200.const", "file_size": 3072 }, "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 716669440, "file_name": "cache/pos_embedprojConv_201.const", "file_size": 3072 }, "onnx::MatMul_12192": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 716672512, "file_name": "cache/pos_embedprojConv_202.const", "file_size": 2691200 }, "onnx::MatMul_12191": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 719363712, "file_name": "cache/pos_embedprojConv_203.const", "file_size": 2691200 }, "onnx::MatMul_12193": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 722054912, "file_name": "cache/pos_embedprojConv_204.const", "file_size": 2691072 }, "onnx::MatMul_12201": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 724745984, "file_name": "cache/pos_embedprojConv_205.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 727437056, "file_name": "cache/pos_embedprojConv_206.const", "file_size": 5382144 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 732819200, "file_name": "cache/pos_embedprojConv_207.const", "file_size": 5382144 }, "onnx::MatMul_12179_onnx::MatMul_12164": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 738201344, "file_name": "cache/pos_embedprojConv_208.const", "file_size": 5382400 }, "onnx::MatMul_12180_onnx::MatMul_12165": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 743583744, "file_name": "cache/pos_embedprojConv_209.const", "file_size": 5382400 }, "onnx::MatMul_12181_onnx::MatMul_12166": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 748966144, "file_name": "cache/pos_embedprojConv_210.const", "file_size": 5382144 }, "onnx::MatMul_12189": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 754348288, "file_name": "cache/pos_embedprojConv_211.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 757039360, "file_name": "cache/pos_embedprojConv_212.const", "file_size": 2691072 }, "onnx::MatMul_12190": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 759730432, "file_name": "cache/pos_embedprojConv_213.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 762421504, "file_name": "cache/pos_embedprojConv_214.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 765112576, "file_name": "cache/pos_embedprojConv_215.const", "file_size": 2691072 }, "/transformer_blocks.6/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 767803648, "file_name": "cache/pos_embedprojConv_216.const", "file_size": 3072 }, "/transformer_blocks.6/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 767806720, "file_name": "cache/pos_embedprojConv_217.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_6_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 767809792, "file_name": "cache/pos_embedprojConv_218.const", "file_size": 5382144 }, "onnx::MatMul_12202": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 773191936, "file_name": "cache/pos_embedprojConv_219.const", "file_size": 10764288 }, "onnx::MatMul_12203": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 783956224, "file_name": "cache/pos_embedprojConv_220.const", "file_size": 10764288 }, "/transformer_blocks.6/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 794720512, "file_name": "cache/pos_embedprojConv_221.const", "file_size": 3072 }, "/transformer_blocks.6/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 794723584, "file_name": "cache/pos_embedprojConv_222.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 794726656, "file_name": "cache/pos_embedprojConv_223.const", "file_size": 5382144 }, "onnx::MatMul_12204": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 800108800, "file_name": "cache/pos_embedprojConv_224.const", "file_size": 10764288 }, "onnx::MatMul_12205": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 810873088, "file_name": "cache/pos_embedprojConv_225.const", "file_size": 10764288 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 821637376, "file_name": "cache/pos_embedprojConv_226.const", "file_size": 2691072 }, "/transformer_blocks.7/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 824328448, "file_name": "cache/pos_embedprojConv_227.const", "file_size": 3072 }, "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 824331520, "file_name": "cache/pos_embedprojConv_228.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 824334592, "file_name": "cache/pos_embedprojConv_229.const", "file_size": 2691072 }, "/transformer_blocks.7/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 827025664, "file_name": "cache/pos_embedprojConv_230.const", "file_size": 3072 }, "/transformer_blocks.7/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 827028736, "file_name": "cache/pos_embedprojConv_231.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 827031808, "file_name": "cache/pos_embedprojConv_232.const", "file_size": 5382144 }, "onnx::MatMul_12234": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 832413952, "file_name": "cache/pos_embedprojConv_233.const", "file_size": 2691200 }, "onnx::MatMul_12233": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 835105152, "file_name": "cache/pos_embedprojConv_234.const", "file_size": 2691200 }, "onnx::MatMul_12235": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 837796352, "file_name": "cache/pos_embedprojConv_235.const", "file_size": 2691072 }, "onnx::MatMul_12243": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 840487424, "file_name": "cache/pos_embedprojConv_236.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 843178496, "file_name": "cache/pos_embedprojConv_237.const", "file_size": 5382144 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 848560640, "file_name": "cache/pos_embedprojConv_238.const", "file_size": 5382144 }, "onnx::MatMul_12221_onnx::MatMul_12206": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 853942784, "file_name": "cache/pos_embedprojConv_239.const", "file_size": 5382400 }, "onnx::MatMul_12222_onnx::MatMul_12207": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 859325184, "file_name": "cache/pos_embedprojConv_240.const", "file_size": 5382400 }, "onnx::MatMul_12223_onnx::MatMul_12208": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 864707584, "file_name": "cache/pos_embedprojConv_241.const", "file_size": 5382144 }, "onnx::MatMul_12231": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 870089728, "file_name": "cache/pos_embedprojConv_242.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 872780800, "file_name": "cache/pos_embedprojConv_243.const", "file_size": 2691072 }, "onnx::MatMul_12232": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 875471872, "file_name": "cache/pos_embedprojConv_244.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 878162944, "file_name": "cache/pos_embedprojConv_245.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 880854016, "file_name": "cache/pos_embedprojConv_246.const", "file_size": 2691072 }, "/transformer_blocks.7/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 883545088, "file_name": "cache/pos_embedprojConv_247.const", "file_size": 3072 }, "/transformer_blocks.7/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 883548160, "file_name": "cache/pos_embedprojConv_248.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_6_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 883551232, "file_name": "cache/pos_embedprojConv_249.const", "file_size": 5382144 }, "onnx::MatMul_12244": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 888933376, "file_name": "cache/pos_embedprojConv_250.const", "file_size": 10764288 }, "onnx::MatMul_12245": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 899697664, "file_name": "cache/pos_embedprojConv_251.const", "file_size": 10764288 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 910461952, "file_name": "cache/pos_embedprojConv_252.const", "file_size": 2691072 }, "/transformer_blocks.8/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913153024, "file_name": "cache/pos_embedprojConv_253.const", "file_size": 3072 }, "/transformer_blocks.8/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913156096, "file_name": "cache/pos_embedprojConv_254.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 913159168, "file_name": "cache/pos_embedprojConv_255.const", "file_size": 5382144 }, "/transformer_blocks.7/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 918541312, "file_name": "cache/pos_embedprojConv_256.const", "file_size": 3072 }, "/transformer_blocks.7/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 918544384, "file_name": "cache/pos_embedprojConv_257.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 918547456, "file_name": "cache/pos_embedprojConv_258.const", "file_size": 5382144 }, "onnx::MatMul_12246": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 923929600, "file_name": "cache/pos_embedprojConv_259.const", "file_size": 10764288 }, "onnx::MatMul_12247": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 934693888, "file_name": "cache/pos_embedprojConv_260.const", "file_size": 10764288 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 945458176, "file_name": "cache/pos_embedprojConv_261.const", "file_size": 2691072 }, "/transformer_blocks.8/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 948149248, "file_name": "cache/pos_embedprojConv_262.const", "file_size": 3072 }, "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 948152320, "file_name": "cache/pos_embedprojConv_263.const", "file_size": 3072 }, "onnx::MatMul_12276": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 948155392, "file_name": "cache/pos_embedprojConv_264.const", "file_size": 2691200 }, "onnx::MatMul_12275": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 950846592, "file_name": "cache/pos_embedprojConv_265.const", "file_size": 2691200 }, "onnx::MatMul_12277": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 953537792, "file_name": "cache/pos_embedprojConv_266.const", "file_size": 2691072 }, "onnx::MatMul_12285": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 956228864, "file_name": "cache/pos_embedprojConv_267.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 958919936, "file_name": "cache/pos_embedprojConv_268.const", "file_size": 5382144 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 964302080, "file_name": "cache/pos_embedprojConv_269.const", "file_size": 5382144 }, "onnx::MatMul_12263_onnx::MatMul_12248": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 969684224, "file_name": "cache/pos_embedprojConv_270.const", "file_size": 5382400 }, "onnx::MatMul_12264_onnx::MatMul_12249": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 975066624, "file_name": "cache/pos_embedprojConv_271.const", "file_size": 5382400 }, "onnx::MatMul_12265_onnx::MatMul_12250": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 980449024, "file_name": "cache/pos_embedprojConv_272.const", "file_size": 5382144 }, "onnx::MatMul_12273": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 985831168, "file_name": "cache/pos_embedprojConv_273.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 988522240, "file_name": "cache/pos_embedprojConv_274.const", "file_size": 2691072 }, "onnx::MatMul_12274": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 991213312, "file_name": "cache/pos_embedprojConv_275.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 993904384, "file_name": "cache/pos_embedprojConv_276.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 996595456, "file_name": "cache/pos_embedprojConv_277.const", "file_size": 2691072 }, "/transformer_blocks.8/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 999286528, "file_name": "cache/pos_embedprojConv_278.const", "file_size": 3072 }, "/transformer_blocks.8/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 999289600, "file_name": "cache/pos_embedprojConv_279.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_6_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 999292672, "file_name": "cache/pos_embedprojConv_280.const", "file_size": 5382144 }, "onnx::MatMul_12286": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1004674816, "file_name": "cache/pos_embedprojConv_281.const", "file_size": 10764288 }, "onnx::MatMul_12287": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1015439104, "file_name": "cache/pos_embedprojConv_282.const", "file_size": 10764288 }, "/transformer_blocks.8/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1026203392, "file_name": "cache/pos_embedprojConv_283.const", "file_size": 3072 }, "/transformer_blocks.8/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1026206464, "file_name": "cache/pos_embedprojConv_284.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1026209536, "file_name": "cache/pos_embedprojConv_285.const", "file_size": 5382144 }, "onnx::MatMul_12288": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1031591680, "file_name": "cache/pos_embedprojConv_286.const", "file_size": 10764288 }, "onnx::MatMul_12289": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1042355968, "file_name": "cache/pos_embedprojConv_287.const", "file_size": 10764288 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1053120256, "file_name": "cache/pos_embedprojConv_288.const", "file_size": 2691072 }, "/transformer_blocks.9/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1055811328, "file_name": "cache/pos_embedprojConv_289.const", "file_size": 3072 }, "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1055814400, "file_name": "cache/pos_embedprojConv_290.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1055817472, "file_name": "cache/pos_embedprojConv_291.const", "file_size": 2691072 }, "/transformer_blocks.9/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1058508544, "file_name": "cache/pos_embedprojConv_292.const", "file_size": 3072 }, "/transformer_blocks.9/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1058511616, "file_name": "cache/pos_embedprojConv_293.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1058514688, "file_name": "cache/pos_embedprojConv_294.const", "file_size": 5382144 }, "onnx::MatMul_12318": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1063896832, "file_name": "cache/pos_embedprojConv_295.const", "file_size": 2691200 }, "onnx::MatMul_12317": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1066588032, "file_name": "cache/pos_embedprojConv_296.const", "file_size": 2691200 }, "onnx::MatMul_12319": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1069279232, "file_name": "cache/pos_embedprojConv_297.const", "file_size": 2691072 }, "onnx::MatMul_12327": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1071970304, "file_name": "cache/pos_embedprojConv_298.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1074661376, "file_name": "cache/pos_embedprojConv_299.const", "file_size": 5382144 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1080043520, "file_name": "cache/pos_embedprojConv_300.const", "file_size": 5382144 }, "onnx::MatMul_12305_onnx::MatMul_12290": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1085425664, "file_name": "cache/pos_embedprojConv_301.const", "file_size": 5382400 }, "onnx::MatMul_12306_onnx::MatMul_12291": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1090808064, "file_name": "cache/pos_embedprojConv_302.const", "file_size": 5382400 }, "onnx::MatMul_12307_onnx::MatMul_12292": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1096190464, "file_name": "cache/pos_embedprojConv_303.const", "file_size": 5382144 }, "onnx::MatMul_12315": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1101572608, "file_name": "cache/pos_embedprojConv_304.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1104263680, "file_name": "cache/pos_embedprojConv_305.const", "file_size": 2691072 }, "onnx::MatMul_12316": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1106954752, "file_name": "cache/pos_embedprojConv_306.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1109645824, "file_name": "cache/pos_embedprojConv_307.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1112336896, "file_name": "cache/pos_embedprojConv_308.const", "file_size": 2691072 }, "/transformer_blocks.9/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1115027968, "file_name": "cache/pos_embedprojConv_309.const", "file_size": 3072 }, "/transformer_blocks.9/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1115031040, "file_name": "cache/pos_embedprojConv_310.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_6_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1115034112, "file_name": "cache/pos_embedprojConv_311.const", "file_size": 5382144 }, "onnx::MatMul_12328": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1120416256, "file_name": "cache/pos_embedprojConv_312.const", "file_size": 10764288 }, "onnx::MatMul_12329": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1131180544, "file_name": "cache/pos_embedprojConv_313.const", "file_size": 10764288 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1141944832, "file_name": "cache/pos_embedprojConv_314.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1144635904, "file_name": "cache/pos_embedprojConv_315.const", "file_size": 3072 }, "/transformer_blocks.10/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1144638976, "file_name": "cache/pos_embedprojConv_316.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1144642048, "file_name": "cache/pos_embedprojConv_317.const", "file_size": 5382144 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1150024192, "file_name": "cache/pos_embedprojConv_318.const", "file_size": 5382144 }, "/transformer_blocks.9/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1155406336, "file_name": "cache/pos_embedprojConv_319.const", "file_size": 3072 }, "/transformer_blocks.9/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1155409408, "file_name": "cache/pos_embedprojConv_320.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1155412480, "file_name": "cache/pos_embedprojConv_321.const", "file_size": 5382144 }, "onnx::MatMul_12330": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1160794624, "file_name": "cache/pos_embedprojConv_322.const", "file_size": 10764288 }, "onnx::MatMul_12331": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1171558912, "file_name": "cache/pos_embedprojConv_323.const", "file_size": 10764288 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1182323200, "file_name": "cache/pos_embedprojConv_324.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1185014272, "file_name": "cache/pos_embedprojConv_325.const", "file_size": 3072 }, "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1185017344, "file_name": "cache/pos_embedprojConv_326.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1185020416, "file_name": "cache/pos_embedprojConv_327.const", "file_size": 5382144 }, "onnx::MatMul_12347_onnx::MatMul_12332": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1190402560, "file_name": "cache/pos_embedprojConv_328.const", "file_size": 5382400 }, "onnx::MatMul_12348_onnx::MatMul_12333": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1195784960, "file_name": "cache/pos_embedprojConv_329.const", "file_size": 5382400 }, "onnx::MatMul_12349_onnx::MatMul_12334": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1201167360, "file_name": "cache/pos_embedprojConv_330.const", "file_size": 5382144 }, "onnx::MatMul_12358": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1206549504, "file_name": "cache/pos_embedprojConv_331.const", "file_size": 2691072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1209240576, "file_name": "cache/pos_embedprojConv_332.const", "file_size": 2691072 }, "onnx::MatMul_12357": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1211931648, "file_name": "cache/pos_embedprojConv_333.const", "file_size": 2691072 }, "onnx::MatMul_12360": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1214622720, "file_name": "cache/pos_embedprojConv_334.const", "file_size": 2691200 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1217313920, "file_name": "cache/pos_embedprojConv_335.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1220004992, "file_name": "cache/pos_embedprojConv_336.const", "file_size": 3072 }, "/transformer_blocks.10/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1220008064, "file_name": "cache/pos_embedprojConv_337.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1220011136, "file_name": "cache/pos_embedprojConv_338.const", "file_size": 5382144 }, "onnx::MatMul_12372": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1225393280, "file_name": "cache/pos_embedprojConv_339.const", "file_size": 10764288 }, "onnx::MatMul_12373": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1236157568, "file_name": "cache/pos_embedprojConv_340.const", "file_size": 10764288 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1246921856, "file_name": "cache/pos_embedprojConv_341.const", "file_size": 2691072 }, "/transformer_blocks.11/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1249612928, "file_name": "cache/pos_embedprojConv_342.const", "file_size": 3072 }, "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1249616000, "file_name": "cache/pos_embedprojConv_343.const", "file_size": 3072 }, "onnx::MatMul_12359": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1249619072, "file_name": "cache/pos_embedprojConv_344.const", "file_size": 2691200 }, "onnx::MatMul_12361": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1252310272, "file_name": "cache/pos_embedprojConv_345.const", "file_size": 2691072 }, "onnx::MatMul_12369": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1255001344, "file_name": "cache/pos_embedprojConv_346.const", "file_size": 2691072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1257692416, "file_name": "cache/pos_embedprojConv_347.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1260383488, "file_name": "cache/pos_embedprojConv_348.const", "file_size": 3072 }, "/transformer_blocks.10/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1260386560, "file_name": "cache/pos_embedprojConv_349.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_6_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1260389632, "file_name": "cache/pos_embedprojConv_350.const", "file_size": 5382144 }, "onnx::MatMul_12370": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1265771776, "file_name": "cache/pos_embedprojConv_351.const", "file_size": 10764288 }, "onnx::MatMul_12371": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1276536064, "file_name": "cache/pos_embedprojConv_352.const", "file_size": 10764288 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1287300352, "file_name": "cache/pos_embedprojConv_353.const", "file_size": 2691072 }, "/transformer_blocks.11/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1289991424, "file_name": "cache/pos_embedprojConv_354.const", "file_size": 3072 }, "/transformer_blocks.11/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1289994496, "file_name": "cache/pos_embedprojConv_355.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1289997568, "file_name": "cache/pos_embedprojConv_356.const", "file_size": 5382144 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1295379712, "file_name": "cache/pos_embedprojConv_357.const", "file_size": 5382144 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1300761856, "file_name": "cache/pos_embedprojConv_358.const", "file_size": 5382144 }, "onnx::MatMul_12389_onnx::MatMul_12374": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1306144000, "file_name": "cache/pos_embedprojConv_359.const", "file_size": 5382400 }, "onnx::MatMul_12390_onnx::MatMul_12375": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1311526400, "file_name": "cache/pos_embedprojConv_360.const", "file_size": 5382400 }, "onnx::MatMul_12391_onnx::MatMul_12376": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1316908800, "file_name": "cache/pos_embedprojConv_361.const", "file_size": 5382144 }, "onnx::MatMul_12400": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1322290944, "file_name": "cache/pos_embedprojConv_362.const", "file_size": 2691072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1324982016, "file_name": "cache/pos_embedprojConv_363.const", "file_size": 2691072 }, "onnx::MatMul_12399": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1327673088, "file_name": "cache/pos_embedprojConv_364.const", "file_size": 2691072 }, "onnx::MatMul_12402": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1330364160, "file_name": "cache/pos_embedprojConv_365.const", "file_size": 2691200 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1333055360, "file_name": "cache/pos_embedprojConv_366.const", "file_size": 2691072 }, "/transformer_blocks.11/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1335746432, "file_name": "cache/pos_embedprojConv_367.const", "file_size": 3072 }, "/transformer_blocks.11/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1335749504, "file_name": "cache/pos_embedprojConv_368.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1335752576, "file_name": "cache/pos_embedprojConv_369.const", "file_size": 5382144 }, "onnx::MatMul_12414": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1341134720, "file_name": "cache/pos_embedprojConv_370.const", "file_size": 10764288 }, "onnx::MatMul_12415": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1351899008, "file_name": "cache/pos_embedprojConv_371.const", "file_size": 10764288 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1362663296, "file_name": "cache/pos_embedprojConv_372.const", "file_size": 2691072 }, "/transformer_blocks.12/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1365354368, "file_name": "cache/pos_embedprojConv_373.const", "file_size": 3072 }, "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1365357440, "file_name": "cache/pos_embedprojConv_374.const", "file_size": 3072 }, "onnx::MatMul_12401": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1365360512, "file_name": "cache/pos_embedprojConv_375.const", "file_size": 2691200 }, "onnx::MatMul_12403": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1368051712, "file_name": "cache/pos_embedprojConv_376.const", "file_size": 2691072 }, "onnx::MatMul_12411": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1370742784, "file_name": "cache/pos_embedprojConv_377.const", "file_size": 2691072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1373433856, "file_name": "cache/pos_embedprojConv_378.const", "file_size": 2691072 }, "/transformer_blocks.11/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1376124928, "file_name": "cache/pos_embedprojConv_379.const", "file_size": 3072 }, "/transformer_blocks.11/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1376128000, "file_name": "cache/pos_embedprojConv_380.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_6_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1376131072, "file_name": "cache/pos_embedprojConv_381.const", "file_size": 5382144 }, "onnx::MatMul_12412": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1381513216, "file_name": "cache/pos_embedprojConv_382.const", "file_size": 10764288 }, "onnx::MatMul_12413": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1392277504, "file_name": "cache/pos_embedprojConv_383.const", "file_size": 10764288 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1403041792, "file_name": "cache/pos_embedprojConv_384.const", "file_size": 2691072 }, "/transformer_blocks.12/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1405732864, "file_name": "cache/pos_embedprojConv_385.const", "file_size": 3072 }, "/transformer_blocks.12/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1405735936, "file_name": "cache/pos_embedprojConv_386.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1405739008, "file_name": "cache/pos_embedprojConv_387.const", "file_size": 5382144 }, "onnx::MatMul_12444": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1411121152, "file_name": "cache/pos_embedprojConv_388.const", "file_size": 2691200 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1413812352, "file_name": "cache/pos_embedprojConv_389.const", "file_size": 5382144 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1419194496, "file_name": "cache/pos_embedprojConv_390.const", "file_size": 5382144 }, "onnx::MatMul_12431_onnx::MatMul_12416": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1424576640, "file_name": "cache/pos_embedprojConv_391.const", "file_size": 5382400 }, "onnx::MatMul_12432_onnx::MatMul_12417": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1429959040, "file_name": "cache/pos_embedprojConv_392.const", "file_size": 5382400 }, "onnx::MatMul_12433_onnx::MatMul_12418": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1435341440, "file_name": "cache/pos_embedprojConv_393.const", "file_size": 5382144 }, "onnx::MatMul_12442": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1440723584, "file_name": "cache/pos_embedprojConv_394.const", "file_size": 2691072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1443414656, "file_name": "cache/pos_embedprojConv_395.const", "file_size": 2691072 }, "onnx::MatMul_12441": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1446105728, "file_name": "cache/pos_embedprojConv_396.const", "file_size": 2691072 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1448796800, "file_name": "cache/pos_embedprojConv_397.const", "file_size": 2691072 }, "/transformer_blocks.12/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1451487872, "file_name": "cache/pos_embedprojConv_398.const", "file_size": 3072 }, "/transformer_blocks.12/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1451490944, "file_name": "cache/pos_embedprojConv_399.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1451494016, "file_name": "cache/pos_embedprojConv_400.const", "file_size": 5382144 }, "onnx::MatMul_12456": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1456876160, "file_name": "cache/pos_embedprojConv_401.const", "file_size": 10764288 }, "onnx::MatMul_12457": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1467640448, "file_name": "cache/pos_embedprojConv_402.const", "file_size": 10764288 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1478404736, "file_name": "cache/pos_embedprojConv_403.const", "file_size": 2691072 }, "/transformer_blocks.13/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1481095808, "file_name": "cache/pos_embedprojConv_404.const", "file_size": 3072 }, "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1481098880, "file_name": "cache/pos_embedprojConv_405.const", "file_size": 3072 }, "onnx::MatMul_12443": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691200 ], "size_in_bytes": 2691200, "op_tensor_size": 2691200, "offset": 1481101952, "file_name": "cache/pos_embedprojConv_406.const", "file_size": 2691200 }, "onnx::MatMul_12445": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1483793152, "file_name": "cache/pos_embedprojConv_407.const", "file_size": 2691072 }, "onnx::MatMul_12453": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1486484224, "file_name": "cache/pos_embedprojConv_408.const", "file_size": 2691072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1489175296, "file_name": "cache/pos_embedprojConv_409.const", "file_size": 2691072 }, "/transformer_blocks.12/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1491866368, "file_name": "cache/pos_embedprojConv_410.const", "file_size": 3072 }, "/transformer_blocks.12/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1491869440, "file_name": "cache/pos_embedprojConv_411.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_6_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1491872512, "file_name": "cache/pos_embedprojConv_412.const", "file_size": 5382144 }, "onnx::MatMul_12454": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1497254656, "file_name": "cache/pos_embedprojConv_413.const", "file_size": 10764288 }, "onnx::MatMul_12455": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1508018944, "file_name": "cache/pos_embedprojConv_414.const", "file_size": 10764288 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_8_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1518783232, "file_name": "cache/pos_embedprojConv_415.const", "file_size": 2691072 }, "/transformer_blocks.13/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1521474304, "file_name": "cache/pos_embedprojConv_416.const", "file_size": 3072 }, "/transformer_blocks.13/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1521477376, "file_name": "cache/pos_embedprojConv_417.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1521480448, "file_name": "cache/pos_embedprojConv_418.const", "file_size": 5382144 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1526862592, "file_name": "cache/pos_embedprojConv_419.const", "file_size": 5382144 }, "onnx::MatMul_12473_onnx::MatMul_12458": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1532244736, "file_name": "cache/pos_embedprojConv_420.const", "file_size": 5382400 }, "onnx::MatMul_12474_onnx::MatMul_12459": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1537627136, "file_name": "cache/pos_embedprojConv_421.const", "file_size": 5382400 }, "onnx::MatMul_12475_onnx::MatMul_12460": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1543009536, "file_name": "cache/pos_embedprojConv_422.const", "file_size": 5382144 }, "onnx::MatMul_12484": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1548391680, "file_name": "cache/pos_embedprojConv_423.const", "file_size": 2691072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1551082752, "file_name": "cache/pos_embedprojConv_424.const", "file_size": 2691072 }, "/transformer_blocks.13/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1553773824, "file_name": "cache/pos_embedprojConv_425.const", "file_size": 3072 }, "/transformer_blocks.13/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1553776896, "file_name": "cache/pos_embedprojConv_426.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1553779968, "file_name": "cache/pos_embedprojConv_427.const", "file_size": 5382144 }, "onnx::MatMul_12485": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1559162112, "file_name": "cache/pos_embedprojConv_428.const", "file_size": 10764288 }, "onnx::MatMul_12486": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1569926400, "file_name": "cache/pos_embedprojConv_429.const", "file_size": 10764288 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1580690688, "file_name": "cache/pos_embedprojConv_430.const", "file_size": 2691072 }, "onnx::MatMul_12483": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1583381760, "file_name": "cache/pos_embedprojConv_431.const", "file_size": 2691072 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1586072832, "file_name": "cache/pos_embedprojConv_432.const", "file_size": 2691072 }, "/transformer_blocks.13/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1588763904, "file_name": "cache/pos_embedprojConv_433.const", "file_size": 3072 }, "/transformer_blocks.13/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1588766976, "file_name": "cache/pos_embedprojConv_434.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1588770048, "file_name": "cache/pos_embedprojConv_435.const", "file_size": 5382144 }, "onnx::MatMul_12487": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1594152192, "file_name": "cache/pos_embedprojConv_436.const", "file_size": 10764288 }, "onnx::MatMul_12488": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1604916480, "file_name": "cache/pos_embedprojConv_437.const", "file_size": 10764288 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1615680768, "file_name": "cache/pos_embedprojConv_438.const", "file_size": 2691072 }, "/transformer_blocks.14/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1618371840, "file_name": "cache/pos_embedprojConv_439.const", "file_size": 3072 }, "/transformer_blocks.14/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1618374912, "file_name": "cache/pos_embedprojConv_440.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1618377984, "file_name": "cache/pos_embedprojConv_441.const", "file_size": 5382144 }, "/transformer_blocks.14/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1623760128, "file_name": "cache/pos_embedprojConv_442.const", "file_size": 3072 }, "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1623763200, "file_name": "cache/pos_embedprojConv_443.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1623766272, "file_name": "cache/pos_embedprojConv_444.const", "file_size": 5382144 }, "onnx::MatMul_12504_onnx::MatMul_12489": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1629148416, "file_name": "cache/pos_embedprojConv_445.const", "file_size": 5382400 }, "onnx::MatMul_12505_onnx::MatMul_12490": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1634530816, "file_name": "cache/pos_embedprojConv_446.const", "file_size": 5382400 }, "onnx::MatMul_12506_onnx::MatMul_12491": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1639913216, "file_name": "cache/pos_embedprojConv_447.const", "file_size": 5382144 }, "onnx::MatMul_12515": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1645295360, "file_name": "cache/pos_embedprojConv_448.const", "file_size": 2691072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1647986432, "file_name": "cache/pos_embedprojConv_449.const", "file_size": 2691072 }, "/transformer_blocks.14/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1650677504, "file_name": "cache/pos_embedprojConv_450.const", "file_size": 3072 }, "/transformer_blocks.14/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1650680576, "file_name": "cache/pos_embedprojConv_451.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1650683648, "file_name": "cache/pos_embedprojConv_452.const", "file_size": 5382144 }, "onnx::MatMul_12516": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1656065792, "file_name": "cache/pos_embedprojConv_453.const", "file_size": 10764288 }, "onnx::MatMul_12517": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1666830080, "file_name": "cache/pos_embedprojConv_454.const", "file_size": 10764288 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1677594368, "file_name": "cache/pos_embedprojConv_455.const", "file_size": 2691072 }, "onnx::MatMul_12514": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1680285440, "file_name": "cache/pos_embedprojConv_456.const", "file_size": 2691072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1682976512, "file_name": "cache/pos_embedprojConv_457.const", "file_size": 2691072 }, "/transformer_blocks.14/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1685667584, "file_name": "cache/pos_embedprojConv_458.const", "file_size": 3072 }, "/transformer_blocks.14/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1685670656, "file_name": "cache/pos_embedprojConv_459.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1685673728, "file_name": "cache/pos_embedprojConv_460.const", "file_size": 5382144 }, "onnx::MatMul_12518": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1691055872, "file_name": "cache/pos_embedprojConv_461.const", "file_size": 10764288 }, "onnx::MatMul_12519": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1701820160, "file_name": "cache/pos_embedprojConv_462.const", "file_size": 10764288 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1712584448, "file_name": "cache/pos_embedprojConv_463.const", "file_size": 2691072 }, "/transformer_blocks.15/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1715275520, "file_name": "cache/pos_embedprojConv_464.const", "file_size": 3072 }, "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1715278592, "file_name": "cache/pos_embedprojConv_465.const", "file_size": 3072 }, "/transformer_blocks.15/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1715281664, "file_name": "cache/pos_embedprojConv_466.const", "file_size": 3072 }, "/transformer_blocks.15/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1715284736, "file_name": "cache/pos_embedprojConv_467.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1715287808, "file_name": "cache/pos_embedprojConv_468.const", "file_size": 5382144 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1720669952, "file_name": "cache/pos_embedprojConv_469.const", "file_size": 5382144 }, "onnx::MatMul_12535_onnx::MatMul_12520": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1726052096, "file_name": "cache/pos_embedprojConv_470.const", "file_size": 5382400 }, "onnx::MatMul_12536_onnx::MatMul_12521": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1731434496, "file_name": "cache/pos_embedprojConv_471.const", "file_size": 5382400 }, "onnx::MatMul_12537_onnx::MatMul_12522": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1736816896, "file_name": "cache/pos_embedprojConv_472.const", "file_size": 5382144 }, "onnx::MatMul_12546": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1742199040, "file_name": "cache/pos_embedprojConv_473.const", "file_size": 2691072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1744890112, "file_name": "cache/pos_embedprojConv_474.const", "file_size": 2691072 }, "/transformer_blocks.15/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1747581184, "file_name": "cache/pos_embedprojConv_475.const", "file_size": 3072 }, "/transformer_blocks.15/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1747584256, "file_name": "cache/pos_embedprojConv_476.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1747587328, "file_name": "cache/pos_embedprojConv_477.const", "file_size": 5382144 }, "onnx::MatMul_12547": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1752969472, "file_name": "cache/pos_embedprojConv_478.const", "file_size": 10764288 }, "onnx::MatMul_12548": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1763733760, "file_name": "cache/pos_embedprojConv_479.const", "file_size": 10764288 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1774498048, "file_name": "cache/pos_embedprojConv_480.const", "file_size": 2691072 }, "onnx::MatMul_12545": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1777189120, "file_name": "cache/pos_embedprojConv_481.const", "file_size": 2691072 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1779880192, "file_name": "cache/pos_embedprojConv_482.const", "file_size": 2691072 }, "/transformer_blocks.15/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1782571264, "file_name": "cache/pos_embedprojConv_483.const", "file_size": 3072 }, "/transformer_blocks.15/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1782574336, "file_name": "cache/pos_embedprojConv_484.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1782577408, "file_name": "cache/pos_embedprojConv_485.const", "file_size": 5382144 }, "onnx::MatMul_12549": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1787959552, "file_name": "cache/pos_embedprojConv_486.const", "file_size": 10764288 }, "onnx::MatMul_12550": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1798723840, "file_name": "cache/pos_embedprojConv_487.const", "file_size": 10764288 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1809488128, "file_name": "cache/pos_embedprojConv_488.const", "file_size": 2691072 }, "/transformer_blocks.16/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1812179200, "file_name": "cache/pos_embedprojConv_489.const", "file_size": 3072 }, "/transformer_blocks.16/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1812182272, "file_name": "cache/pos_embedprojConv_490.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1812185344, "file_name": "cache/pos_embedprojConv_491.const", "file_size": 5382144 }, "/transformer_blocks.16/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1817567488, "file_name": "cache/pos_embedprojConv_492.const", "file_size": 3072 }, "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1817570560, "file_name": "cache/pos_embedprojConv_493.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1817573632, "file_name": "cache/pos_embedprojConv_494.const", "file_size": 5382144 }, "onnx::MatMul_12566_onnx::MatMul_12551": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1822955776, "file_name": "cache/pos_embedprojConv_495.const", "file_size": 5382400 }, "onnx::MatMul_12567_onnx::MatMul_12552": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1828338176, "file_name": "cache/pos_embedprojConv_496.const", "file_size": 5382400 }, "onnx::MatMul_12568_onnx::MatMul_12553": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1833720576, "file_name": "cache/pos_embedprojConv_497.const", "file_size": 5382144 }, "onnx::MatMul_12577": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1839102720, "file_name": "cache/pos_embedprojConv_498.const", "file_size": 2691072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1841793792, "file_name": "cache/pos_embedprojConv_499.const", "file_size": 2691072 }, "/transformer_blocks.16/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1844484864, "file_name": "cache/pos_embedprojConv_500.const", "file_size": 3072 }, "/transformer_blocks.16/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1844487936, "file_name": "cache/pos_embedprojConv_501.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1844491008, "file_name": "cache/pos_embedprojConv_502.const", "file_size": 5382144 }, "onnx::MatMul_12578": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1849873152, "file_name": "cache/pos_embedprojConv_503.const", "file_size": 10764288 }, "onnx::MatMul_12579": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1860637440, "file_name": "cache/pos_embedprojConv_504.const", "file_size": 10764288 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1871401728, "file_name": "cache/pos_embedprojConv_505.const", "file_size": 2691072 }, "onnx::MatMul_12576": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1874092800, "file_name": "cache/pos_embedprojConv_506.const", "file_size": 2691072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1876783872, "file_name": "cache/pos_embedprojConv_507.const", "file_size": 2691072 }, "/transformer_blocks.16/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1879474944, "file_name": "cache/pos_embedprojConv_508.const", "file_size": 3072 }, "/transformer_blocks.16/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1879478016, "file_name": "cache/pos_embedprojConv_509.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1879481088, "file_name": "cache/pos_embedprojConv_510.const", "file_size": 5382144 }, "onnx::MatMul_12580": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1884863232, "file_name": "cache/pos_embedprojConv_511.const", "file_size": 10764288 }, "onnx::MatMul_12581": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1895627520, "file_name": "cache/pos_embedprojConv_512.const", "file_size": 10764288 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1906391808, "file_name": "cache/pos_embedprojConv_513.const", "file_size": 2691072 }, "/transformer_blocks.17/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1909082880, "file_name": "cache/pos_embedprojConv_514.const", "file_size": 3072 }, "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1909085952, "file_name": "cache/pos_embedprojConv_515.const", "file_size": 3072 }, "/transformer_blocks.17/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1909089024, "file_name": "cache/pos_embedprojConv_516.const", "file_size": 3072 }, "/transformer_blocks.17/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1909092096, "file_name": "cache/pos_embedprojConv_517.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1909095168, "file_name": "cache/pos_embedprojConv_518.const", "file_size": 5382144 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1914477312, "file_name": "cache/pos_embedprojConv_519.const", "file_size": 5382144 }, "onnx::MatMul_12597_onnx::MatMul_12582": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1919859456, "file_name": "cache/pos_embedprojConv_520.const", "file_size": 5382400 }, "onnx::MatMul_12598_onnx::MatMul_12583": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 1925241856, "file_name": "cache/pos_embedprojConv_521.const", "file_size": 5382400 }, "onnx::MatMul_12599_onnx::MatMul_12584": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1930624256, "file_name": "cache/pos_embedprojConv_522.const", "file_size": 5382144 }, "onnx::MatMul_12608": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1936006400, "file_name": "cache/pos_embedprojConv_523.const", "file_size": 2691072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1938697472, "file_name": "cache/pos_embedprojConv_524.const", "file_size": 2691072 }, "/transformer_blocks.17/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1941388544, "file_name": "cache/pos_embedprojConv_525.const", "file_size": 3072 }, "/transformer_blocks.17/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1941391616, "file_name": "cache/pos_embedprojConv_526.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1941394688, "file_name": "cache/pos_embedprojConv_527.const", "file_size": 5382144 }, "onnx::MatMul_12609": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1946776832, "file_name": "cache/pos_embedprojConv_528.const", "file_size": 10764288 }, "onnx::MatMul_12610": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1957541120, "file_name": "cache/pos_embedprojConv_529.const", "file_size": 10764288 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1968305408, "file_name": "cache/pos_embedprojConv_530.const", "file_size": 2691072 }, "onnx::MatMul_12607": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1970996480, "file_name": "cache/pos_embedprojConv_531.const", "file_size": 2691072 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1973687552, "file_name": "cache/pos_embedprojConv_532.const", "file_size": 2691072 }, "/transformer_blocks.17/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1976378624, "file_name": "cache/pos_embedprojConv_533.const", "file_size": 3072 }, "/transformer_blocks.17/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1976381696, "file_name": "cache/pos_embedprojConv_534.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1976384768, "file_name": "cache/pos_embedprojConv_535.const", "file_size": 5382144 }, "onnx::MatMul_12611": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1981766912, "file_name": "cache/pos_embedprojConv_536.const", "file_size": 10764288 }, "onnx::MatMul_12612": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1992531200, "file_name": "cache/pos_embedprojConv_537.const", "file_size": 10764288 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2003295488, "file_name": "cache/pos_embedprojConv_538.const", "file_size": 2691072 }, "/transformer_blocks.18/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2005986560, "file_name": "cache/pos_embedprojConv_539.const", "file_size": 3072 }, "/transformer_blocks.18/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2005989632, "file_name": "cache/pos_embedprojConv_540.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2005992704, "file_name": "cache/pos_embedprojConv_541.const", "file_size": 5382144 }, "/transformer_blocks.18/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2011374848, "file_name": "cache/pos_embedprojConv_542.const", "file_size": 3072 }, "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2011377920, "file_name": "cache/pos_embedprojConv_543.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2011380992, "file_name": "cache/pos_embedprojConv_544.const", "file_size": 5382144 }, "onnx::MatMul_12628_onnx::MatMul_12613": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2016763136, "file_name": "cache/pos_embedprojConv_545.const", "file_size": 5382400 }, "onnx::MatMul_12629_onnx::MatMul_12614": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2022145536, "file_name": "cache/pos_embedprojConv_546.const", "file_size": 5382400 }, "onnx::MatMul_12630_onnx::MatMul_12615": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2027527936, "file_name": "cache/pos_embedprojConv_547.const", "file_size": 5382144 }, "onnx::MatMul_12639": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2032910080, "file_name": "cache/pos_embedprojConv_548.const", "file_size": 2691072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2035601152, "file_name": "cache/pos_embedprojConv_549.const", "file_size": 2691072 }, "/transformer_blocks.18/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2038292224, "file_name": "cache/pos_embedprojConv_550.const", "file_size": 3072 }, "/transformer_blocks.18/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2038295296, "file_name": "cache/pos_embedprojConv_551.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2038298368, "file_name": "cache/pos_embedprojConv_552.const", "file_size": 5382144 }, "onnx::MatMul_12640": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2043680512, "file_name": "cache/pos_embedprojConv_553.const", "file_size": 10764288 }, "onnx::MatMul_12641": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2054444800, "file_name": "cache/pos_embedprojConv_554.const", "file_size": 10764288 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2065209088, "file_name": "cache/pos_embedprojConv_555.const", "file_size": 2691072 }, "onnx::MatMul_12638": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2067900160, "file_name": "cache/pos_embedprojConv_556.const", "file_size": 2691072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2070591232, "file_name": "cache/pos_embedprojConv_557.const", "file_size": 2691072 }, "/transformer_blocks.18/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2073282304, "file_name": "cache/pos_embedprojConv_558.const", "file_size": 3072 }, "/transformer_blocks.18/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2073285376, "file_name": "cache/pos_embedprojConv_559.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2073288448, "file_name": "cache/pos_embedprojConv_560.const", "file_size": 5382144 }, "onnx::MatMul_12642": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2078670592, "file_name": "cache/pos_embedprojConv_561.const", "file_size": 10764288 }, "onnx::MatMul_12643": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2089434880, "file_name": "cache/pos_embedprojConv_562.const", "file_size": 10764288 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2100199168, "file_name": "cache/pos_embedprojConv_563.const", "file_size": 2691072 }, "/transformer_blocks.19/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2102890240, "file_name": "cache/pos_embedprojConv_564.const", "file_size": 3072 }, "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2102893312, "file_name": "cache/pos_embedprojConv_565.const", "file_size": 3072 }, "/transformer_blocks.19/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2102896384, "file_name": "cache/pos_embedprojConv_566.const", "file_size": 3072 }, "/transformer_blocks.19/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2102899456, "file_name": "cache/pos_embedprojConv_567.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2102902528, "file_name": "cache/pos_embedprojConv_568.const", "file_size": 5382144 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2108284672, "file_name": "cache/pos_embedprojConv_569.const", "file_size": 5382144 }, "onnx::MatMul_12659_onnx::MatMul_12644": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2113666816, "file_name": "cache/pos_embedprojConv_570.const", "file_size": 5382400 }, "onnx::MatMul_12660_onnx::MatMul_12645": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2119049216, "file_name": "cache/pos_embedprojConv_571.const", "file_size": 5382400 }, "onnx::MatMul_12661_onnx::MatMul_12646": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2124431616, "file_name": "cache/pos_embedprojConv_572.const", "file_size": 5382144 }, "onnx::MatMul_12670": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2129813760, "file_name": "cache/pos_embedprojConv_573.const", "file_size": 2691072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2132504832, "file_name": "cache/pos_embedprojConv_574.const", "file_size": 2691072 }, "/transformer_blocks.19/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2135195904, "file_name": "cache/pos_embedprojConv_575.const", "file_size": 3072 }, "/transformer_blocks.19/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2135198976, "file_name": "cache/pos_embedprojConv_576.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2135202048, "file_name": "cache/pos_embedprojConv_577.const", "file_size": 5382144 }, "onnx::MatMul_12671": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2140584192, "file_name": "cache/pos_embedprojConv_578.const", "file_size": 10764288 }, "onnx::MatMul_12672": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2151348480, "file_name": "cache/pos_embedprojConv_579.const", "file_size": 10764288 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2162112768, "file_name": "cache/pos_embedprojConv_580.const", "file_size": 2691072 }, "onnx::MatMul_12669": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2164803840, "file_name": "cache/pos_embedprojConv_581.const", "file_size": 2691072 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2167494912, "file_name": "cache/pos_embedprojConv_582.const", "file_size": 2691072 }, "/transformer_blocks.19/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2170185984, "file_name": "cache/pos_embedprojConv_583.const", "file_size": 3072 }, "/transformer_blocks.19/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2170189056, "file_name": "cache/pos_embedprojConv_584.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2170192128, "file_name": "cache/pos_embedprojConv_585.const", "file_size": 5382144 }, "onnx::MatMul_12673": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2175574272, "file_name": "cache/pos_embedprojConv_586.const", "file_size": 10764288 }, "onnx::MatMul_12674": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2186338560, "file_name": "cache/pos_embedprojConv_587.const", "file_size": 10764288 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2197102848, "file_name": "cache/pos_embedprojConv_588.const", "file_size": 2691072 }, "/transformer_blocks.20/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2199793920, "file_name": "cache/pos_embedprojConv_589.const", "file_size": 3072 }, "/transformer_blocks.20/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2199796992, "file_name": "cache/pos_embedprojConv_590.const", "file_size": 3072 }, "/transformer_blocks.20/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2199800064, "file_name": "cache/pos_embedprojConv_591.const", "file_size": 3072 }, "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2199803136, "file_name": "cache/pos_embedprojConv_592.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2199806208, "file_name": "cache/pos_embedprojConv_593.const", "file_size": 5382144 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2205188352, "file_name": "cache/pos_embedprojConv_594.const", "file_size": 5382144 }, "onnx::MatMul_12690_onnx::MatMul_12675": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2210570496, "file_name": "cache/pos_embedprojConv_595.const", "file_size": 5382400 }, "onnx::MatMul_12691_onnx::MatMul_12676": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2215952896, "file_name": "cache/pos_embedprojConv_596.const", "file_size": 5382400 }, "onnx::MatMul_12692_onnx::MatMul_12677": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2221335296, "file_name": "cache/pos_embedprojConv_597.const", "file_size": 5382144 }, "onnx::MatMul_12700": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2226717440, "file_name": "cache/pos_embedprojConv_598.const", "file_size": 2691072 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2229408512, "file_name": "cache/pos_embedprojConv_599.const", "file_size": 2691072 }, "onnx::MatMul_12701": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2232099584, "file_name": "cache/pos_embedprojConv_600.const", "file_size": 2691072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2234790656, "file_name": "cache/pos_embedprojConv_601.const", "file_size": 2691072 }, "/transformer_blocks.20/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2237481728, "file_name": "cache/pos_embedprojConv_602.const", "file_size": 3072 }, "/transformer_blocks.20/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2237484800, "file_name": "cache/pos_embedprojConv_603.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2237487872, "file_name": "cache/pos_embedprojConv_604.const", "file_size": 5382144 }, "onnx::MatMul_12702": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2242870016, "file_name": "cache/pos_embedprojConv_605.const", "file_size": 10764288 }, "onnx::MatMul_12703": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2253634304, "file_name": "cache/pos_embedprojConv_606.const", "file_size": 10764288 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2264398592, "file_name": "cache/pos_embedprojConv_607.const", "file_size": 2691072 }, "/transformer_blocks.20/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2267089664, "file_name": "cache/pos_embedprojConv_608.const", "file_size": 3072 }, "/transformer_blocks.20/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2267092736, "file_name": "cache/pos_embedprojConv_609.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2267095808, "file_name": "cache/pos_embedprojConv_610.const", "file_size": 5382144 }, "onnx::MatMul_12704": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2272477952, "file_name": "cache/pos_embedprojConv_611.const", "file_size": 10764288 }, "onnx::MatMul_12705": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2283242240, "file_name": "cache/pos_embedprojConv_612.const", "file_size": 10764288 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2294006528, "file_name": "cache/pos_embedprojConv_613.const", "file_size": 2691072 }, "/transformer_blocks.21/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2296697600, "file_name": "cache/pos_embedprojConv_614.const", "file_size": 3072 }, "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2296700672, "file_name": "cache/pos_embedprojConv_615.const", "file_size": 3072 }, "/transformer_blocks.21/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2296703744, "file_name": "cache/pos_embedprojConv_616.const", "file_size": 3072 }, "/transformer_blocks.21/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2296706816, "file_name": "cache/pos_embedprojConv_617.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2296709888, "file_name": "cache/pos_embedprojConv_618.const", "file_size": 5382144 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2302092032, "file_name": "cache/pos_embedprojConv_619.const", "file_size": 5382144 }, "onnx::MatMul_12721_onnx::MatMul_12706": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2307474176, "file_name": "cache/pos_embedprojConv_620.const", "file_size": 5382400 }, "onnx::MatMul_12722_onnx::MatMul_12707": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2312856576, "file_name": "cache/pos_embedprojConv_621.const", "file_size": 5382400 }, "onnx::MatMul_12723_onnx::MatMul_12708": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2318238976, "file_name": "cache/pos_embedprojConv_622.const", "file_size": 5382144 }, "onnx::MatMul_12732": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2323621120, "file_name": "cache/pos_embedprojConv_623.const", "file_size": 2691072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2326312192, "file_name": "cache/pos_embedprojConv_624.const", "file_size": 2691072 }, "/transformer_blocks.21/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2329003264, "file_name": "cache/pos_embedprojConv_625.const", "file_size": 3072 }, "/transformer_blocks.21/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2329006336, "file_name": "cache/pos_embedprojConv_626.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2329009408, "file_name": "cache/pos_embedprojConv_627.const", "file_size": 5382144 }, "onnx::MatMul_12733": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2334391552, "file_name": "cache/pos_embedprojConv_628.const", "file_size": 10764288 }, "onnx::MatMul_12734": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2345155840, "file_name": "cache/pos_embedprojConv_629.const", "file_size": 10764288 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2355920128, "file_name": "cache/pos_embedprojConv_630.const", "file_size": 2691072 }, "onnx::MatMul_12731": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2358611200, "file_name": "cache/pos_embedprojConv_631.const", "file_size": 2691072 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2361302272, "file_name": "cache/pos_embedprojConv_632.const", "file_size": 2691072 }, "/transformer_blocks.21/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2363993344, "file_name": "cache/pos_embedprojConv_633.const", "file_size": 3072 }, "/transformer_blocks.21/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2363996416, "file_name": "cache/pos_embedprojConv_634.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2363999488, "file_name": "cache/pos_embedprojConv_635.const", "file_size": 5382144 }, "onnx::MatMul_12735": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2369381632, "file_name": "cache/pos_embedprojConv_636.const", "file_size": 10764288 }, "onnx::MatMul_12736": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2380145920, "file_name": "cache/pos_embedprojConv_637.const", "file_size": 10764288 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2390910208, "file_name": "cache/pos_embedprojConv_638.const", "file_size": 2691072 }, "/transformer_blocks.22/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2393601280, "file_name": "cache/pos_embedprojConv_639.const", "file_size": 3072 }, "/transformer_blocks.22/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2393604352, "file_name": "cache/pos_embedprojConv_640.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2393607424, "file_name": "cache/pos_embedprojConv_641.const", "file_size": 5382144 }, "/transformer_blocks.22/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2398989568, "file_name": "cache/pos_embedprojConv_642.const", "file_size": 3072 }, "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2398992640, "file_name": "cache/pos_embedprojConv_643.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2398995712, "file_name": "cache/pos_embedprojConv_644.const", "file_size": 5382144 }, "onnx::MatMul_12752_onnx::MatMul_12737": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2404377856, "file_name": "cache/pos_embedprojConv_645.const", "file_size": 5382400 }, "onnx::MatMul_12753_onnx::MatMul_12738": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2409760256, "file_name": "cache/pos_embedprojConv_646.const", "file_size": 5382400 }, "onnx::MatMul_12754_onnx::MatMul_12739": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2415142656, "file_name": "cache/pos_embedprojConv_647.const", "file_size": 5382144 }, "onnx::MatMul_12763": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2420524800, "file_name": "cache/pos_embedprojConv_648.const", "file_size": 2691072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2423215872, "file_name": "cache/pos_embedprojConv_649.const", "file_size": 2691072 }, "/transformer_blocks.22/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2425906944, "file_name": "cache/pos_embedprojConv_650.const", "file_size": 3072 }, "/transformer_blocks.22/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2425910016, "file_name": "cache/pos_embedprojConv_651.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2425913088, "file_name": "cache/pos_embedprojConv_652.const", "file_size": 5382144 }, "onnx::MatMul_12764": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2431295232, "file_name": "cache/pos_embedprojConv_653.const", "file_size": 10764288 }, "onnx::MatMul_12765": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2442059520, "file_name": "cache/pos_embedprojConv_654.const", "file_size": 10764288 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2452823808, "file_name": "cache/pos_embedprojConv_655.const", "file_size": 2691072 }, "onnx::MatMul_12762": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2455514880, "file_name": "cache/pos_embedprojConv_656.const", "file_size": 2691072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2458205952, "file_name": "cache/pos_embedprojConv_657.const", "file_size": 2691072 }, "/transformer_blocks.22/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2460897024, "file_name": "cache/pos_embedprojConv_658.const", "file_size": 3072 }, "/transformer_blocks.22/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2460900096, "file_name": "cache/pos_embedprojConv_659.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2460903168, "file_name": "cache/pos_embedprojConv_660.const", "file_size": 5382144 }, "onnx::MatMul_12766": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2466285312, "file_name": "cache/pos_embedprojConv_661.const", "file_size": 10764288 }, "onnx::MatMul_12767": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2477049600, "file_name": "cache/pos_embedprojConv_662.const", "file_size": 10764288 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2487813888, "file_name": "cache/pos_embedprojConv_663.const", "file_size": 2691072 }, "/transformer_blocks.23/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2490504960, "file_name": "cache/pos_embedprojConv_664.const", "file_size": 3072 }, "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2490508032, "file_name": "cache/pos_embedprojConv_665.const", "file_size": 3072 }, "/transformer_blocks.23/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2490511104, "file_name": "cache/pos_embedprojConv_666.const", "file_size": 3072 }, "/transformer_blocks.23/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2490514176, "file_name": "cache/pos_embedprojConv_667.const", "file_size": 3072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2490517248, "file_name": "cache/pos_embedprojConv_668.const", "file_size": 5382144 }, "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2495899392, "file_name": "cache/pos_embedprojConv_669.const", "file_size": 5382144 }, "onnx::MatMul_12783_onnx::MatMul_12768": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2501281536, "file_name": "cache/pos_embedprojConv_670.const", "file_size": 5382400 }, "onnx::MatMul_12784_onnx::MatMul_12769": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382400 ], "size_in_bytes": 5382400, "op_tensor_size": 5382400, "offset": 2506663936, "file_name": "cache/pos_embedprojConv_671.const", "file_size": 5382400 }, "onnx::MatMul_12785_onnx::MatMul_12770": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2512046336, "file_name": "cache/pos_embedprojConv_672.const", "file_size": 5382144 }, "onnx::MatMul_12790": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2517428480, "file_name": "cache/pos_embedprojConv_673.const", "file_size": 2691072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2520119552, "file_name": "cache/pos_embedprojConv_674.const", "file_size": 2691072 }, "/transformer_blocks.23/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2522810624, "file_name": "cache/pos_embedprojConv_675.const", "file_size": 3072 }, "/transformer_blocks.23/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2522813696, "file_name": "cache/pos_embedprojConv_676.const", "file_size": 3072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2522816768, "file_name": "cache/pos_embedprojConv_677.const", "file_size": 5382144 }, "onnx::MatMul_12791": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2528198912, "file_name": "cache/pos_embedprojConv_678.const", "file_size": 10764288 }, "onnx::MatMul_12792": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2538963200, "file_name": "cache/pos_embedprojConv_679.const", "file_size": 10764288 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2549727488, "file_name": "cache/pos_embedprojConv_680.const", "file_size": 2691072 }, "/norm_out/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2552418560, "file_name": "cache/pos_embedprojConv_681.const", "file_size": 3072 }, "/norm_out/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2552421632, "file_name": "cache/pos_embedprojConv_682.const", "file_size": 3072 }, "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2552424704, "file_name": "cache/pos_embedprojConv_683.const", "file_size": 5382144 }, "/norm_out/Add_2_output_0.out0_0_108_bfp.out1_105_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 2557806848, "file_name": "cache/pos_embedprojConv_684.const", "file_size": 128 }, "onnx::MatMul_12793": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 112128 ], "size_in_bytes": 112128, "op_tensor_size": 112128, "offset": 2557806976, "file_name": "cache/pos_embedprojConv_685.const", "file_size": 112128 } }, "dynamic_shape_subgraph": true, "dynamic_shape_list": [ { "max_length + state_dim1": 1184, "floor(h/2)": 32, "state_dim1+max_length": 1184, "h": 64, "floor(w/2)": 32, "w": 64, "batch_2": 2, "state_dim1": 1024, "state_dim1 + max_length": 1184, "max_length": 160 }, { "max_length + state_dim1": 1696, "floor(h/2)": 32, "state_dim1+max_length": 1696, "h": 64, "floor(w/2)": 48, "w": 96, "batch_2": 2, "state_dim1": 1536, "state_dim1 + max_length": 1696, "max_length": 160 }, { "max_length + state_dim1": 1696, "floor(h/2)": 48, "state_dim1+max_length": 1696, "h": 96, "floor(w/2)": 32, "w": 64, "batch_2": 2, "state_dim1": 1536, "state_dim1 + max_length": 1696, "max_length": 160 }, { "max_length + state_dim1": 2464, "floor(h/2)": 36, "state_dim1+max_length": 2464, "h": 72, "floor(w/2)": 64, "w": 128, "batch_2": 2, "state_dim1": 2304, "state_dim1 + max_length": 2464, "max_length": 160 }, { "max_length + state_dim1": 2464, "floor(h/2)": 64, "state_dim1+max_length": 2464, "h": 128, "floor(w/2)": 36, "w": 72, "batch_2": 2, "state_dim1": 2304, "state_dim1 + max_length": 2464, "max_length": 160 }, { "max_length + state_dim1": 3232, "floor(h/2)": 48, "state_dim1+max_length": 3232, "h": 96, "floor(w/2)": 64, "w": 128, "batch_2": 2, "state_dim1": 3072, "state_dim1 + max_length": 3232, "max_length": 160 }, { "max_length + state_dim1": 3232, "floor(h/2)": 64, "state_dim1+max_length": 3232, "h": 128, "floor(w/2)": 48, "w": 96, "batch_2": 2, "state_dim1": 3072, "state_dim1 + max_length": 3232, "max_length": 160 }, { "max_length + state_dim1": 4256, "floor(h/2)": 64, "state_dim1+max_length": 4256, "h": 128, "floor(w/2)": 64, "w": 128, "batch_2": 2, "state_dim1": 4096, "state_dim1 + max_length": 4256, "max_length": 160 } ], "aux_info": {} }