{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [], "op_list": [ { "name": "/pos_embed/proj/Conv", "type": "SDConv", "in_args": [ "hidden_states_nhwc.out5_0_0" ], "const_args": [ "existing_model.pos_embed.proj.weight" ], "out_args": [ "/pos_embed/Transpose_output_0.out5_0_0" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed/Add_2", "type": "SDAdd", "in_args": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2" ], "const_args": [], "out_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "1", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/Cast_output_0.out17_3_3" ], "const_args": [ "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "256" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "256", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/act/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "const_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1" ], "out_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "const_args": [ "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "pooled_projections.out17_3_1" ], "const_args": [ "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2" ], "out_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "2048" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "2048", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/act_1/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "const_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0" ], "out_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "const_args": [ "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3" ], "out_args": [ "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/Add", "type": "SDAdd", "in_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "const_args": [], "out_args": [ "/time_text_embed/Add_output_0.out_35_1_3" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/silu/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/Add_output_0.out_35_1_3" ], "const_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2" ], "out_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp", "type": "SDCastBf2Bfp", "in_args": [ "encoder_hidden_states.out17_3_0" ], "const_args": [ "encoder_hidden_states.out17_3_0_bfp.wts" ], "out_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/MatMul", "type": "SDGemm_bfp", "in_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "const_args": [ "onnx::MatMul_7905" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "4096", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "const_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1" ], "out_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "const_args": [ "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1" ], "out_args": [ "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94" ], "const_args": [ "onnx::MatMul_7909_onnx::MatMul_7906" ], "out_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94" ], "const_args": [ "onnx::MatMul_7910_onnx::MatMul_7907" ], "out_args": [ "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94" ], "const_args": [ "onnx::MatMul_7911_onnx::MatMul_7908" ], "out_args": [ "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_0", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "const_args": [], "out_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_7926" ], "out_args": [ "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4" ], "out_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "const_args": [ "onnx::MatMul_7929" ], "out_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "const_args": [ "onnx::MatMul_7930" ], "out_args": [ "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_7925" ], "out_args": [ "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/pos_embed/Add_2_output_0.out_35_1_2" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_output_0.out10_92" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_output_0.out10_92" ], "const_args": [ "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4" ], "out_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_95" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_95" ], "const_args": [ "onnx::MatMul_7927" ], "out_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "const_args": [ "onnx::MatMul_7928" ], "out_args": [ "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_output_0.out10_92" ], "const_args": [ "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_3_output_0.out10_93" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add", "type": "SDAdd", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out10_93", "block_controlnet_hidden_states_0.out_35_1_4" ], "const_args": [], "out_args": [ "/Add_output_0.out_35_1_4" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_output_0.out_35_1_4" ], "const_args": [ "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1" ], "out_args": [ "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1" ], "out_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2" ], "const_args": [ "onnx::MatMul_7934_onnx::MatMul_7931" ], "out_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2" ], "const_args": [ "onnx::MatMul_7935_onnx::MatMul_7932" ], "out_args": [ "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2" ], "const_args": [ "onnx::MatMul_7936_onnx::MatMul_7933" ], "out_args": [ "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_1", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "const_args": [], "out_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_7950" ], "out_args": [ "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/Add_output_0.out_35_1_4" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4" ], "out_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_7952" ], "out_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_9" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_9" ], "const_args": [ "onnx::MatMul_7953" ], "out_args": [ "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_19_bfp.out25_11" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_19_bfp.out25_11", "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_1", "type": "SDAdd", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3", "block_controlnet_hidden_states_0.out_35_1_4" ], "const_args": [], "out_args": [ "/Add_1_output_0.out_35_1_5" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_7951" ], "out_args": [ "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4" ], "out_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "const_args": [ "onnx::MatMul_7954" ], "out_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_10" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_10" ], "const_args": [ "onnx::MatMul_7955" ], "out_args": [ "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_20_bfp.out25_12" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_1_output_0.out_35_1_5" ], "const_args": [ "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1" ], "out_args": [ "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1" ], "out_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46" ], "const_args": [ "onnx::MatMul_7959_onnx::MatMul_7956" ], "out_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46" ], "const_args": [ "onnx::MatMul_7960_onnx::MatMul_7957" ], "out_args": [ "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46" ], "const_args": [ "onnx::MatMul_7961_onnx::MatMul_7958" ], "out_args": [ "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_2", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "const_args": [], "out_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_7976" ], "out_args": [ "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_4_output_0.out10_48" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_7975" ], "out_args": [ "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/Add_1_output_0.out_35_1_5" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_output_0.out10_46" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_output_0.out10_46" ], "const_args": [ "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4" ], "out_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_47" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_47" ], "const_args": [ "onnx::MatMul_7977" ], "out_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_13" ], "const_args": [ "onnx::MatMul_7978" ], "out_args": [ "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_27_bfp.out25_15" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_4_output_0.out10_48" ], "const_args": [ "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4" ], "out_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_49" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_49" ], "const_args": [ "onnx::MatMul_7979" ], "out_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_14" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_14" ], "const_args": [ "onnx::MatMul_7980" ], "out_args": [ "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_16" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_16", "/transformer_blocks.2/Add_4_output_0.out10_48" ], "const_args": [ "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_7_output_0.out10_49" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_7_output_0.out10_49" ], "const_args": [ "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_27_bfp.out25_15", "/transformer_blocks.2/Add_output_0.out10_46" ], "const_args": [ "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_3_output_0.out10_47" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_2", "type": "SDAdd", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out10_47", "block_controlnet_hidden_states_1.out_35_1_6" ], "const_args": [], "out_args": [ "/Add_2_output_0.out_35_1_6" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_2_output_0.out_35_1_6" ], "const_args": [ "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1" ], "out_args": [ "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1" ], "out_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65" ], "const_args": [ "onnx::MatMul_7984_onnx::MatMul_7981" ], "out_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65" ], "const_args": [ "onnx::MatMul_7985_onnx::MatMul_7982" ], "out_args": [ "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65" ], "const_args": [ "onnx::MatMul_7986_onnx::MatMul_7983" ], "out_args": [ "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_3", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "const_args": [], "out_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_8001" ], "out_args": [ "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.2/Add_7_output_0.out10_49" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_4_output_0.out10_66" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_8000" ], "out_args": [ "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/Add_2_output_0.out_35_1_6" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_output_0.out10_64" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_output_0.out10_64" ], "const_args": [ "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4" ], "out_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_66" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_66" ], "const_args": [ "onnx::MatMul_8002" ], "out_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_17" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_17" ], "const_args": [ "onnx::MatMul_8003" ], "out_args": [ "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_35_bfp.out25_19" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_35_bfp.out25_19", "/transformer_blocks.3/Add_output_0.out10_64" ], "const_args": [ "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_3_output_0.out10_65" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_3", "type": "SDAdd", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out10_65", "block_controlnet_hidden_states_1.out_35_1_6" ], "const_args": [], "out_args": [ "/Add_3_output_0.out_35_1_7" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_4_output_0.out10_66" ], "const_args": [ "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4" ], "out_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_68" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_68" ], "const_args": [ "onnx::MatMul_8004" ], "out_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_18" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_18" ], "const_args": [ "onnx::MatMul_8005" ], "out_args": [ "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_36_bfp.out25_20" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_36_bfp.out25_20", "/transformer_blocks.3/Add_4_output_0.out10_66" ], "const_args": [ "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_7_output_0.out10_67" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_3_output_0.out_35_1_7" ], "const_args": [ "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_7_output_0.out10_67" ], "const_args": [ "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1" ], "out_args": [ "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1" ], "out_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69" ], "const_args": [ "onnx::MatMul_8009_onnx::MatMul_8006" ], "out_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69" ], "const_args": [ "onnx::MatMul_8010_onnx::MatMul_8007" ], "out_args": [ "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69" ], "const_args": [ "onnx::MatMul_8011_onnx::MatMul_8008" ], "out_args": [ "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_4", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "const_args": [], "out_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_8026" ], "out_args": [ "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.3/Add_7_output_0.out10_67" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_4_output_0.out10_70" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_8025" ], "out_args": [ "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/Add_3_output_0.out_35_1_7" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_output_0.out10_68" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_output_0.out10_68" ], "const_args": [ "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4" ], "out_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_70" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_70" ], "const_args": [ "onnx::MatMul_8027" ], "out_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_21" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_21" ], "const_args": [ "onnx::MatMul_8028" ], "out_args": [ "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_43_bfp.out25_23" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_4_output_0.out10_70" ], "const_args": [ "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4" ], "out_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_72" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_72" ], "const_args": [ "onnx::MatMul_8029" ], "out_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_22" ], "const_args": [ "onnx::MatMul_8030" ], "out_args": [ "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_44_bfp.out25_24" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_44_bfp.out25_24", "/transformer_blocks.4/Add_4_output_0.out10_70" ], "const_args": [ "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_7_output_0.out10_71" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_7_output_0.out10_71" ], "const_args": [ "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_43_bfp.out25_23", "/transformer_blocks.4/Add_output_0.out10_68" ], "const_args": [ "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_3_output_0.out10_69" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_4", "type": "SDAdd", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out10_69", "block_controlnet_hidden_states_2.out_35_1_8" ], "const_args": [], "out_args": [ "/Add_4_output_0.out_35_1_8" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_4_output_0.out_35_1_8" ], "const_args": [ "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1" ], "out_args": [ "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1" ], "out_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73" ], "const_args": [ "onnx::MatMul_8034_onnx::MatMul_8031" ], "out_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73" ], "const_args": [ "onnx::MatMul_8035_onnx::MatMul_8032" ], "out_args": [ "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73" ], "const_args": [ "onnx::MatMul_8036_onnx::MatMul_8033" ], "out_args": [ "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_5", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "const_args": [], "out_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_8051" ], "out_args": [ "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.4/Add_7_output_0.out10_71" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_4_output_0.out10_74" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_8050" ], "out_args": [ "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/Add_4_output_0.out_35_1_8" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_output_0.out10_72" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_output_0.out10_72" ], "const_args": [ "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4" ], "out_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_74" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_74" ], "const_args": [ "onnx::MatMul_8052" ], "out_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_25" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_25" ], "const_args": [ "onnx::MatMul_8053" ], "out_args": [ "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_51_bfp.out25_27" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_51_bfp.out25_27", "/transformer_blocks.5/Add_output_0.out10_72" ], "const_args": [ "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_3_output_0.out10_73" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_5", "type": "SDAdd", "in_args": [ "/transformer_blocks.5/Add_3_output_0.out10_73", "block_controlnet_hidden_states_2.out_35_1_8" ], "const_args": [], "out_args": [ "/Add_5_output_0.out_35_1_9" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_4_output_0.out10_74" ], "const_args": [ "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4" ], "out_args": [ "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_76" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_76" ], "const_args": [ "onnx::MatMul_8054" ], "out_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_26" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_26" ], "const_args": [ "onnx::MatMul_8055" ], "out_args": [ "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_28" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_28", "/transformer_blocks.5/Add_4_output_0.out10_74" ], "const_args": [ "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_7_output_0.out10_75" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_5_output_0.out_35_1_9" ], "const_args": [ "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_7_output_0.out10_75" ], "const_args": [ "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1" ], "out_args": [ "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1" ], "out_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77" ], "const_args": [ "onnx::MatMul_8059_onnx::MatMul_8056" ], "out_args": [ "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77" ], "const_args": [ "onnx::MatMul_8060_onnx::MatMul_8057" ], "out_args": [ "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77" ], "const_args": [ "onnx::MatMul_8061_onnx::MatMul_8058" ], "out_args": [ "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_6", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18", "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19", "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20" ], "const_args": [], "out_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_8076" ], "out_args": [ "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.5/Add_7_output_0.out10_75" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_4_output_0.out10_78" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_8075" ], "out_args": [ "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/Add_5_output_0.out_35_1_9" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_output_0.out10_76" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_output_0.out10_76" ], "const_args": [ "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4" ], "out_args": [ "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_78" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_78" ], "const_args": [ "onnx::MatMul_8077" ], "out_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_29" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_29" ], "const_args": [ "onnx::MatMul_8078" ], "out_args": [ "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_59_bfp.out25_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_4_output_0.out10_78" ], "const_args": [ "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4" ], "out_args": [ "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_80" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_80" ], "const_args": [ "onnx::MatMul_8079" ], "out_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_30" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_30" ], "const_args": [ "onnx::MatMul_8080" ], "out_args": [ "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_60_bfp.out25_32" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_60_bfp.out25_32", "/transformer_blocks.6/Add_4_output_0.out10_78" ], "const_args": [ "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_7_output_0.out10_79" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_7_output_0.out10_79" ], "const_args": [ "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_59_bfp.out25_31", "/transformer_blocks.6/Add_output_0.out10_76" ], "const_args": [ "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_3_output_0.out10_77" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_6", "type": "SDAdd", "in_args": [ "/transformer_blocks.6/Add_3_output_0.out10_77", "block_controlnet_hidden_states_3.out_35_1_10" ], "const_args": [], "out_args": [ "/Add_6_output_0.out_35_1_10" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_6_output_0.out_35_1_10" ], "const_args": [ "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1" ], "out_args": [ "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1" ], "out_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81" ], "const_args": [ "onnx::MatMul_8084_onnx::MatMul_8081" ], "out_args": [ "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81" ], "const_args": [ "onnx::MatMul_8085_onnx::MatMul_8082" ], "out_args": [ "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81" ], "const_args": [ "onnx::MatMul_8086_onnx::MatMul_8083" ], "out_args": [ "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_7", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21", "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22", "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23" ], "const_args": [], "out_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "const_args": [ "onnx::MatMul_8101" ], "out_args": [ "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.6/Add_7_output_0.out10_79" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_4_output_0.out10_82" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "const_args": [ "onnx::MatMul_8100" ], "out_args": [ "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/Add_6_output_0.out_35_1_10" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_output_0.out10_80" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_output_0.out10_80" ], "const_args": [ "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4" ], "out_args": [ "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_82" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_82" ], "const_args": [ "onnx::MatMul_8102" ], "out_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_33" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_33" ], "const_args": [ "onnx::MatMul_8103" ], "out_args": [ "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_67_bfp.out25_35" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_67_bfp.out25_35", "/transformer_blocks.7/Add_output_0.out10_80" ], "const_args": [ "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_3_output_0.out10_81" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_7", "type": "SDAdd", "in_args": [ "/transformer_blocks.7/Add_3_output_0.out10_81", "block_controlnet_hidden_states_3.out_35_1_10" ], "const_args": [], "out_args": [ "/Add_7_output_0.out_35_1_11" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_4_output_0.out10_82" ], "const_args": [ "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4" ], "out_args": [ "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_84" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_84" ], "const_args": [ "onnx::MatMul_8104" ], "out_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_34" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_34" ], "const_args": [ "onnx::MatMul_8105" ], "out_args": [ "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_68_bfp.out25_36" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_68_bfp.out25_36", "/transformer_blocks.7/Add_4_output_0.out10_82" ], "const_args": [ "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_7_output_0.out10_83" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_7_output_0.out_35_1_11" ], "const_args": [ "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_7_output_0.out10_83" ], "const_args": [ "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1" ], "out_args": [ "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1" ], "out_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85" ], "const_args": [ "onnx::MatMul_8109_onnx::MatMul_8106" ], "out_args": [ "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85" ], "const_args": [ "onnx::MatMul_8110_onnx::MatMul_8107" ], "out_args": [ "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85" ], "const_args": [ "onnx::MatMul_8111_onnx::MatMul_8108" ], "out_args": [ "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_8", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24", "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25", "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26" ], "const_args": [], "out_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_8126" ], "out_args": [ "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.7/Add_7_output_0.out10_83" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_4_output_0.out10_86" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_8125" ], "out_args": [ "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/Add_7_output_0.out_35_1_11" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_output_0.out10_84" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_output_0.out10_84" ], "const_args": [ "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4" ], "out_args": [ "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_86" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_86" ], "const_args": [ "onnx::MatMul_8127" ], "out_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_37" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_37" ], "const_args": [ "onnx::MatMul_8128" ], "out_args": [ "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_75_bfp.out25_39" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_4_output_0.out10_86" ], "const_args": [ "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4" ], "out_args": [ "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_88" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_88" ], "const_args": [ "onnx::MatMul_8129" ], "out_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_38" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_38" ], "const_args": [ "onnx::MatMul_8130" ], "out_args": [ "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_40" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_40", "/transformer_blocks.8/Add_4_output_0.out10_86" ], "const_args": [ "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_7_output_0.out10_87" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_7_output_0.out10_87" ], "const_args": [ "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_75_bfp.out25_39", "/transformer_blocks.8/Add_output_0.out10_84" ], "const_args": [ "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_3_output_0.out10_85" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_8", "type": "SDAdd", "in_args": [ "/transformer_blocks.8/Add_3_output_0.out10_85", "block_controlnet_hidden_states_4.out_35_1_12" ], "const_args": [], "out_args": [ "/Add_8_output_0.out_35_1_12" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_8_output_0.out_35_1_12" ], "const_args": [ "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1" ], "out_args": [ "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1" ], "out_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89" ], "const_args": [ "onnx::MatMul_8134_onnx::MatMul_8131" ], "out_args": [ "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89" ], "const_args": [ "onnx::MatMul_8135_onnx::MatMul_8132" ], "out_args": [ "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89" ], "const_args": [ "onnx::MatMul_8136_onnx::MatMul_8133" ], "out_args": [ "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_9", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27", "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28", "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29" ], "const_args": [], "out_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "const_args": [ "onnx::MatMul_8151" ], "out_args": [ "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.8/Add_7_output_0.out10_87" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_4_output_0.out10_90" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "const_args": [ "onnx::MatMul_8150" ], "out_args": [ "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/Add_8_output_0.out_35_1_12" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_output_0.out10_88" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_output_0.out10_88" ], "const_args": [ "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4" ], "out_args": [ "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_90" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_90" ], "const_args": [ "onnx::MatMul_8152" ], "out_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_41" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_41" ], "const_args": [ "onnx::MatMul_8153" ], "out_args": [ "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_83_bfp.out25_43" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_83_bfp.out25_43", "/transformer_blocks.9/Add_output_0.out10_88" ], "const_args": [ "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_3_output_0.out10_89" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_9", "type": "SDAdd", "in_args": [ "/transformer_blocks.9/Add_3_output_0.out10_89", "block_controlnet_hidden_states_4.out_35_1_12" ], "const_args": [], "out_args": [ "/Add_9_output_0.out_35_1_13" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_9_output_0.out_35_1_13" ], "const_args": [ "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1" ], "out_args": [ "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_4_output_0.out10_90" ], "const_args": [ "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4" ], "out_args": [ "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_92" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_92" ], "const_args": [ "onnx::MatMul_8154" ], "out_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_42" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_42" ], "const_args": [ "onnx::MatMul_8155" ], "out_args": [ "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_44" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_44", "/transformer_blocks.9/Add_4_output_0.out10_90" ], "const_args": [ "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_7_output_0.out10_91" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_7_output_0.out10_91" ], "const_args": [ "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1" ], "out_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_8159_onnx::MatMul_8156" ], "out_args": [ "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_8160_onnx::MatMul_8157" ], "out_args": [ "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_8161_onnx::MatMul_8158" ], "out_args": [ "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_10", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30", "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31", "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32" ], "const_args": [], "out_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_8175" ], "out_args": [ "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/Add_9_output_0.out_35_1_13" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_output_0.out10_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_output_0.out10_6" ], "const_args": [ "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4" ], "out_args": [ "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7" ], "const_args": [ "onnx::MatMul_8177" ], "out_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_45" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_45" ], "const_args": [ "onnx::MatMul_8178" ], "out_args": [ "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_91_bfp.out25_47" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_91_bfp.out25_47", "/transformer_blocks.10/Add_output_0.out10_6" ], "const_args": [ "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_3_output_0.out10_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_8176" ], "out_args": [ "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.9/Add_7_output_0.out10_91" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_4_output_0.out10_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_4_output_0.out10_8" ], "const_args": [ "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4" ], "out_args": [ "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9" ], "const_args": [ "onnx::MatMul_8179" ], "out_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_46" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_46" ], "const_args": [ "onnx::MatMul_8180" ], "out_args": [ "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_92_bfp.out25_48" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_92_bfp.out25_48", "/transformer_blocks.10/Add_4_output_0.out10_8" ], "const_args": [ "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_7_output_0.out10_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_7_output_0.out10_9" ], "const_args": [ "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_10", "type": "SDAdd", "in_args": [ "/transformer_blocks.10/Add_3_output_0.out10_7", "block_controlnet_hidden_states_5.out_35_1_14" ], "const_args": [], "out_args": [ "/Add_10_output_0.out_35_1_14" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_10_output_0.out_35_1_14" ], "const_args": [ "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1" ], "out_args": [ "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1" ], "out_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10" ], "const_args": [ "onnx::MatMul_8184_onnx::MatMul_8181" ], "out_args": [ "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10" ], "const_args": [ "onnx::MatMul_8185_onnx::MatMul_8182" ], "out_args": [ "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10" ], "const_args": [ "onnx::MatMul_8186_onnx::MatMul_8183" ], "out_args": [ "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_11", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33", "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34", "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35" ], "const_args": [], "out_args": [ "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11" ], "const_args": [ "onnx::MatMul_8200" ], "out_args": [ "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/Add_10_output_0.out_35_1_14" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma" ], "out_args": [ "/transformer_blocks.11/Add_output_0.out10_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_output_0.out10_10" ], "const_args": [ "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4" ], "out_args": [ "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11" ], "const_args": [ "onnx::MatMul_8202" ], "out_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_49" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_49" ], "const_args": [ "onnx::MatMul_8203" ], "out_args": [ "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_99_bfp.out25_51" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_99_bfp.out25_51", "/transformer_blocks.11/Add_output_0.out10_10" ], "const_args": [ "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma" ], "out_args": [ "/transformer_blocks.11/Add_3_output_0.out10_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_11", "type": "SDAdd", "in_args": [ "/transformer_blocks.11/Add_3_output_0.out10_11", "block_controlnet_hidden_states_5.out_35_1_14" ], "const_args": [], "out_args": [ "/Add_11_output_0.out_35_1_15" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11" ], "const_args": [ "onnx::MatMul_8201" ], "out_args": [ "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23", "/transformer_blocks.10/Add_7_output_0.out10_9" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma" ], "out_args": [ "/transformer_blocks.11/Add_4_output_0.out10_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_4_output_0.out10_12" ], "const_args": [ "/transformer_blocks.11/norm2_context/Constant_output_0", "/transformer_blocks.11/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4" ], "out_args": [ "/transformer_blocks.11/Add_6_output_0.out0_0_47_bfp.out1_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/Add_6_output_0.out0_0_47_bfp.out1_13" ], "const_args": [ "onnx::MatMul_8204" ], "out_args": [ "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_50" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_50" ], "const_args": [ "onnx::MatMul_8205" ], "out_args": [ "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_100_bfp.out25_52" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_100_bfp.out25_52", "/transformer_blocks.11/Add_4_output_0.out10_12" ], "const_args": [ "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma" ], "out_args": [ "/transformer_blocks.11/Add_7_output_0.out10_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_11_output_0.out_35_1_15" ], "const_args": [ "/transformer_blocks.12/norm1/norm/Constant_output_0", "/transformer_blocks.12/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1" ], "out_args": [ "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_7_output_0.out10_13" ], "const_args": [ "/transformer_blocks.12/norm1_context/norm/Constant_output_0", "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1" ], "out_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14" ], "const_args": [ "onnx::MatMul_8209_onnx::MatMul_8206" ], "out_args": [ "/transformer_blocks.12/attn/Concat_output_0.out22_12_bfp.out23_36" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14" ], "const_args": [ "onnx::MatMul_8210_onnx::MatMul_8207" ], "out_args": [ "/transformer_blocks.12/attn/Concat_1_output_0.out22_12_bfp.out23_37" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14" ], "const_args": [ "onnx::MatMul_8211_onnx::MatMul_8208" ], "out_args": [ "/transformer_blocks.12/attn/Concat_2_output_0.out22_12_bfp.out23_38" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_12", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.12/attn/Concat_output_0.out22_12_bfp.out23_36", "/transformer_blocks.12/attn/Concat_1_output_0.out22_12_bfp.out23_37", "/transformer_blocks.12/attn/Concat_2_output_0.out22_12_bfp.out23_38" ], "const_args": [], "out_args": [ "/transformer_blocks.12/attn/Reshape_3_output_0.out22_12_bfp.out27_0_12" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.12/attn/Reshape_3_output_0.out22_12_bfp.out27_0_12" ], "const_args": [ "onnx::MatMul_8225" ], "out_args": [ "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24", "/Add_11_output_0.out_35_1_15" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma" ], "out_args": [ "/transformer_blocks.12/Add_output_0.out10_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_output_0.out10_14" ], "const_args": [ "/transformer_blocks.12/norm2/Constant_output_0", "/transformer_blocks.12/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4" ], "out_args": [ "/transformer_blocks.12/Add_2_output_0.out0_0_50_bfp.out1_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/Add_2_output_0.out0_0_50_bfp.out1_15" ], "const_args": [ "onnx::MatMul_8227" ], "out_args": [ "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_53" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_53" ], "const_args": [ "onnx::MatMul_8228" ], "out_args": [ "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_107_bfp.out25_55" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_107_bfp.out25_55", "/transformer_blocks.12/Add_output_0.out10_14" ], "const_args": [ "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma" ], "out_args": [ "/transformer_blocks.12/Add_3_output_0.out10_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.12/attn/Reshape_3_output_0.out22_12_bfp.out27_0_12" ], "const_args": [ "onnx::MatMul_8226" ], "out_args": [ "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25", "/transformer_blocks.11/Add_7_output_0.out10_13" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma" ], "out_args": [ "/transformer_blocks.12/Add_4_output_0.out10_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_4_output_0.out10_16" ], "const_args": [ "/transformer_blocks.12/norm2_context/Constant_output_0", "/transformer_blocks.12/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4" ], "out_args": [ "/transformer_blocks.12/Add_6_output_0.out0_0_51_bfp.out1_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/Add_6_output_0.out0_0_51_bfp.out1_17" ], "const_args": [ "onnx::MatMul_8229" ], "out_args": [ "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_54" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_54" ], "const_args": [ "onnx::MatMul_8230" ], "out_args": [ "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_108_bfp.out25_56" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.12/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_108_bfp.out25_56", "/transformer_blocks.12/Add_4_output_0.out10_16" ], "const_args": [ "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma" ], "out_args": [ "/transformer_blocks.12/Add_7_output_0.out10_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.12/Add_7_output_0.out10_17" ], "const_args": [ "/transformer_blocks.13/norm1_context/norm/Constant_output_0", "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_12", "type": "SDAdd", "in_args": [ "/transformer_blocks.12/Add_3_output_0.out10_15", "block_controlnet_hidden_states_6.out_35_1_16" ], "const_args": [], "out_args": [ "/Add_12_output_0.out_35_1_16" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_12_output_0.out_35_1_16" ], "const_args": [ "/transformer_blocks.13/norm1/norm/Constant_output_0", "/transformer_blocks.13/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1" ], "out_args": [ "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1" ], "out_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18" ], "const_args": [ "onnx::MatMul_8234_onnx::MatMul_8231" ], "out_args": [ "/transformer_blocks.13/attn/Concat_output_0.out22_13_bfp.out23_39" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18" ], "const_args": [ "onnx::MatMul_8235_onnx::MatMul_8232" ], "out_args": [ "/transformer_blocks.13/attn/Concat_1_output_0.out22_13_bfp.out23_40" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18" ], "const_args": [ "onnx::MatMul_8236_onnx::MatMul_8233" ], "out_args": [ "/transformer_blocks.13/attn/Concat_2_output_0.out22_13_bfp.out23_41" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_13", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.13/attn/Concat_output_0.out22_13_bfp.out23_39", "/transformer_blocks.13/attn/Concat_1_output_0.out22_13_bfp.out23_40", "/transformer_blocks.13/attn/Concat_2_output_0.out22_13_bfp.out23_41" ], "const_args": [], "out_args": [ "/transformer_blocks.13/attn/Reshape_3_output_0.out22_13_bfp.out27_0_13" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.13/attn/Reshape_3_output_0.out22_13_bfp.out27_0_13" ], "const_args": [ "onnx::MatMul_8250" ], "out_args": [ "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26", "/Add_12_output_0.out_35_1_16" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma" ], "out_args": [ "/transformer_blocks.13/Add_output_0.out10_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_output_0.out10_18" ], "const_args": [ "/transformer_blocks.13/norm2/Constant_output_0", "/transformer_blocks.13/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4" ], "out_args": [ "/transformer_blocks.13/Add_2_output_0.out0_0_54_bfp.out1_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/Add_2_output_0.out0_0_54_bfp.out1_19" ], "const_args": [ "onnx::MatMul_8252" ], "out_args": [ "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_57" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_57" ], "const_args": [ "onnx::MatMul_8253" ], "out_args": [ "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_115_bfp.out25_59" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_115_bfp.out25_59", "/transformer_blocks.13/Add_output_0.out10_18" ], "const_args": [ "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma" ], "out_args": [ "/transformer_blocks.13/Add_3_output_0.out10_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_13", "type": "SDAdd", "in_args": [ "/transformer_blocks.13/Add_3_output_0.out10_19", "block_controlnet_hidden_states_6.out_35_1_16" ], "const_args": [], "out_args": [ "/Add_13_output_0.out_35_1_17" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.13/attn/Reshape_3_output_0.out22_13_bfp.out27_0_13" ], "const_args": [ "onnx::MatMul_8251" ], "out_args": [ "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27", "/transformer_blocks.12/Add_7_output_0.out10_17" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma" ], "out_args": [ "/transformer_blocks.13/Add_4_output_0.out10_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_4_output_0.out10_20" ], "const_args": [ "/transformer_blocks.13/norm2_context/Constant_output_0", "/transformer_blocks.13/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4" ], "out_args": [ "/transformer_blocks.13/Add_6_output_0.out0_0_55_bfp.out1_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/Add_6_output_0.out0_0_55_bfp.out1_21" ], "const_args": [ "onnx::MatMul_8254" ], "out_args": [ "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_58" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_58" ], "const_args": [ "onnx::MatMul_8255" ], "out_args": [ "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_116_bfp.out25_60" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.13/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_116_bfp.out25_60", "/transformer_blocks.13/Add_4_output_0.out10_20" ], "const_args": [ "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma" ], "out_args": [ "/transformer_blocks.13/Add_7_output_0.out10_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_13_output_0.out_35_1_17" ], "const_args": [ "/transformer_blocks.14/norm1/norm/Constant_output_0", "/transformer_blocks.14/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1" ], "out_args": [ "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.13/Add_7_output_0.out10_21" ], "const_args": [ "/transformer_blocks.14/norm1_context/norm/Constant_output_0", "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1" ], "out_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22" ], "const_args": [ "onnx::MatMul_8259_onnx::MatMul_8256" ], "out_args": [ "/transformer_blocks.14/attn/Concat_output_0.out22_14_bfp.out23_42" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22" ], "const_args": [ "onnx::MatMul_8260_onnx::MatMul_8257" ], "out_args": [ "/transformer_blocks.14/attn/Concat_1_output_0.out22_14_bfp.out23_43" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22" ], "const_args": [ "onnx::MatMul_8261_onnx::MatMul_8258" ], "out_args": [ "/transformer_blocks.14/attn/Concat_2_output_0.out22_14_bfp.out23_44" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_14", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.14/attn/Concat_output_0.out22_14_bfp.out23_42", "/transformer_blocks.14/attn/Concat_1_output_0.out22_14_bfp.out23_43", "/transformer_blocks.14/attn/Concat_2_output_0.out22_14_bfp.out23_44" ], "const_args": [], "out_args": [ "/transformer_blocks.14/attn/Reshape_3_output_0.out22_14_bfp.out27_0_14" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.14/attn/Reshape_3_output_0.out22_14_bfp.out27_0_14" ], "const_args": [ "onnx::MatMul_8275" ], "out_args": [ "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28", "/Add_13_output_0.out_35_1_17" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma" ], "out_args": [ "/transformer_blocks.14/Add_output_0.out10_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_output_0.out10_22" ], "const_args": [ "/transformer_blocks.14/norm2/Constant_output_0", "/transformer_blocks.14/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4" ], "out_args": [ "/transformer_blocks.14/Add_2_output_0.out0_0_58_bfp.out1_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/Add_2_output_0.out0_0_58_bfp.out1_23" ], "const_args": [ "onnx::MatMul_8277" ], "out_args": [ "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_61" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_61" ], "const_args": [ "onnx::MatMul_8278" ], "out_args": [ "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_123_bfp.out25_63" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_123_bfp.out25_63", "/transformer_blocks.14/Add_output_0.out10_22" ], "const_args": [ "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma" ], "out_args": [ "/transformer_blocks.14/Add_3_output_0.out10_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.14/attn/Reshape_3_output_0.out22_14_bfp.out27_0_14" ], "const_args": [ "onnx::MatMul_8276" ], "out_args": [ "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29", "/transformer_blocks.13/Add_7_output_0.out10_21" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma" ], "out_args": [ "/transformer_blocks.14/Add_4_output_0.out10_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_4_output_0.out10_24" ], "const_args": [ "/transformer_blocks.14/norm2_context/Constant_output_0", "/transformer_blocks.14/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4" ], "out_args": [ "/transformer_blocks.14/Add_6_output_0.out0_0_59_bfp.out1_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/Add_6_output_0.out0_0_59_bfp.out1_25" ], "const_args": [ "onnx::MatMul_8279" ], "out_args": [ "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_62" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_62" ], "const_args": [ "onnx::MatMul_8280" ], "out_args": [ "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_124_bfp.out25_64" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.14/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_124_bfp.out25_64", "/transformer_blocks.14/Add_4_output_0.out10_24" ], "const_args": [ "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma" ], "out_args": [ "/transformer_blocks.14/Add_7_output_0.out10_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.14/Add_7_output_0.out10_25" ], "const_args": [ "/transformer_blocks.15/norm1_context/norm/Constant_output_0", "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_14", "type": "SDAdd", "in_args": [ "/transformer_blocks.14/Add_3_output_0.out10_23", "block_controlnet_hidden_states_7.out_35_1_18" ], "const_args": [], "out_args": [ "/Add_14_output_0.out_35_1_18" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_14_output_0.out_35_1_18" ], "const_args": [ "/transformer_blocks.15/norm1/norm/Constant_output_0", "/transformer_blocks.15/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1" ], "out_args": [ "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1" ], "out_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26" ], "const_args": [ "onnx::MatMul_8284_onnx::MatMul_8281" ], "out_args": [ "/transformer_blocks.15/attn/Concat_output_0.out22_15_bfp.out23_45" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26" ], "const_args": [ "onnx::MatMul_8285_onnx::MatMul_8282" ], "out_args": [ "/transformer_blocks.15/attn/Concat_1_output_0.out22_15_bfp.out23_46" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26" ], "const_args": [ "onnx::MatMul_8286_onnx::MatMul_8283" ], "out_args": [ "/transformer_blocks.15/attn/Concat_2_output_0.out22_15_bfp.out23_47" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_15", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.15/attn/Concat_output_0.out22_15_bfp.out23_45", "/transformer_blocks.15/attn/Concat_1_output_0.out22_15_bfp.out23_46", "/transformer_blocks.15/attn/Concat_2_output_0.out22_15_bfp.out23_47" ], "const_args": [], "out_args": [ "/transformer_blocks.15/attn/Reshape_3_output_0.out22_15_bfp.out27_0_15" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.15/attn/Reshape_3_output_0.out22_15_bfp.out27_0_15" ], "const_args": [ "onnx::MatMul_8300" ], "out_args": [ "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30", "/Add_14_output_0.out_35_1_18" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma" ], "out_args": [ "/transformer_blocks.15/Add_output_0.out10_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_output_0.out10_26" ], "const_args": [ "/transformer_blocks.15/norm2/Constant_output_0", "/transformer_blocks.15/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4" ], "out_args": [ "/transformer_blocks.15/Add_2_output_0.out0_0_62_bfp.out1_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/Add_2_output_0.out0_0_62_bfp.out1_27" ], "const_args": [ "onnx::MatMul_8302" ], "out_args": [ "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_65" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_65" ], "const_args": [ "onnx::MatMul_8303" ], "out_args": [ "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_131_bfp.out25_67" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_131_bfp.out25_67", "/transformer_blocks.15/Add_output_0.out10_26" ], "const_args": [ "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma" ], "out_args": [ "/transformer_blocks.15/Add_3_output_0.out10_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_15", "type": "SDAdd", "in_args": [ "/transformer_blocks.15/Add_3_output_0.out10_27", "block_controlnet_hidden_states_7.out_35_1_18" ], "const_args": [], "out_args": [ "/Add_15_output_0.out_35_1_19" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.15/attn/Reshape_3_output_0.out22_15_bfp.out27_0_15" ], "const_args": [ "onnx::MatMul_8301" ], "out_args": [ "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31", "/transformer_blocks.14/Add_7_output_0.out10_25" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma" ], "out_args": [ "/transformer_blocks.15/Add_4_output_0.out10_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_4_output_0.out10_28" ], "const_args": [ "/transformer_blocks.15/norm2_context/Constant_output_0", "/transformer_blocks.15/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4" ], "out_args": [ "/transformer_blocks.15/Add_6_output_0.out0_0_63_bfp.out1_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/Add_6_output_0.out0_0_63_bfp.out1_29" ], "const_args": [ "onnx::MatMul_8304" ], "out_args": [ "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_66" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_66" ], "const_args": [ "onnx::MatMul_8305" ], "out_args": [ "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_132_bfp.out25_68" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.15/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_132_bfp.out25_68", "/transformer_blocks.15/Add_4_output_0.out10_28" ], "const_args": [ "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma" ], "out_args": [ "/transformer_blocks.15/Add_7_output_0.out10_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_15_output_0.out_35_1_19" ], "const_args": [ "/transformer_blocks.16/norm1/norm/Constant_output_0", "/transformer_blocks.16/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1" ], "out_args": [ "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.15/Add_7_output_0.out10_29" ], "const_args": [ "/transformer_blocks.16/norm1_context/norm/Constant_output_0", "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1" ], "out_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30" ], "const_args": [ "onnx::MatMul_8309_onnx::MatMul_8306" ], "out_args": [ "/transformer_blocks.16/attn/Concat_output_0.out22_16_bfp.out23_48" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30" ], "const_args": [ "onnx::MatMul_8310_onnx::MatMul_8307" ], "out_args": [ "/transformer_blocks.16/attn/Concat_1_output_0.out22_16_bfp.out23_49" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30" ], "const_args": [ "onnx::MatMul_8311_onnx::MatMul_8308" ], "out_args": [ "/transformer_blocks.16/attn/Concat_2_output_0.out22_16_bfp.out23_50" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_16", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.16/attn/Concat_output_0.out22_16_bfp.out23_48", "/transformer_blocks.16/attn/Concat_1_output_0.out22_16_bfp.out23_49", "/transformer_blocks.16/attn/Concat_2_output_0.out22_16_bfp.out23_50" ], "const_args": [], "out_args": [ "/transformer_blocks.16/attn/Reshape_3_output_0.out22_16_bfp.out27_0_16" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.16/attn/Reshape_3_output_0.out22_16_bfp.out27_0_16" ], "const_args": [ "onnx::MatMul_8325" ], "out_args": [ "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32", "/Add_15_output_0.out_35_1_19" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma" ], "out_args": [ "/transformer_blocks.16/Add_output_0.out10_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_output_0.out10_30" ], "const_args": [ "/transformer_blocks.16/norm2/Constant_output_0", "/transformer_blocks.16/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4" ], "out_args": [ "/transformer_blocks.16/Add_2_output_0.out0_0_66_bfp.out1_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/Add_2_output_0.out0_0_66_bfp.out1_31" ], "const_args": [ "onnx::MatMul_8327" ], "out_args": [ "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_69" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_69" ], "const_args": [ "onnx::MatMul_8328" ], "out_args": [ "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_139_bfp.out25_71" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_139_bfp.out25_71", "/transformer_blocks.16/Add_output_0.out10_30" ], "const_args": [ "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma" ], "out_args": [ "/transformer_blocks.16/Add_3_output_0.out10_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.16/attn/Reshape_3_output_0.out22_16_bfp.out27_0_16" ], "const_args": [ "onnx::MatMul_8326" ], "out_args": [ "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33", "/transformer_blocks.15/Add_7_output_0.out10_29" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma" ], "out_args": [ "/transformer_blocks.16/Add_4_output_0.out10_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_4_output_0.out10_32" ], "const_args": [ "/transformer_blocks.16/norm2_context/Constant_output_0", "/transformer_blocks.16/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4" ], "out_args": [ "/transformer_blocks.16/Add_6_output_0.out0_0_67_bfp.out1_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/Add_6_output_0.out0_0_67_bfp.out1_33" ], "const_args": [ "onnx::MatMul_8329" ], "out_args": [ "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_70" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_70" ], "const_args": [ "onnx::MatMul_8330" ], "out_args": [ "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_140_bfp.out25_72" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.16/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_140_bfp.out25_72", "/transformer_blocks.16/Add_4_output_0.out10_32" ], "const_args": [ "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma" ], "out_args": [ "/transformer_blocks.16/Add_7_output_0.out10_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.16/Add_7_output_0.out10_33" ], "const_args": [ "/transformer_blocks.17/norm1_context/norm/Constant_output_0", "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_16", "type": "SDAdd", "in_args": [ "/transformer_blocks.16/Add_3_output_0.out10_31", "block_controlnet_hidden_states_8.out_35_1_20" ], "const_args": [], "out_args": [ "/Add_16_output_0.out_35_1_20" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_16_output_0.out_35_1_20" ], "const_args": [ "/transformer_blocks.17/norm1/norm/Constant_output_0", "/transformer_blocks.17/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1" ], "out_args": [ "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1" ], "out_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34" ], "const_args": [ "onnx::MatMul_8334_onnx::MatMul_8331" ], "out_args": [ "/transformer_blocks.17/attn/Concat_output_0.out22_17_bfp.out23_51" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34" ], "const_args": [ "onnx::MatMul_8335_onnx::MatMul_8332" ], "out_args": [ "/transformer_blocks.17/attn/Concat_1_output_0.out22_17_bfp.out23_52" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34" ], "const_args": [ "onnx::MatMul_8336_onnx::MatMul_8333" ], "out_args": [ "/transformer_blocks.17/attn/Concat_2_output_0.out22_17_bfp.out23_53" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_17", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.17/attn/Concat_output_0.out22_17_bfp.out23_51", "/transformer_blocks.17/attn/Concat_1_output_0.out22_17_bfp.out23_52", "/transformer_blocks.17/attn/Concat_2_output_0.out22_17_bfp.out23_53" ], "const_args": [], "out_args": [ "/transformer_blocks.17/attn/Reshape_3_output_0.out22_17_bfp.out27_0_17" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.17/attn/Reshape_3_output_0.out22_17_bfp.out27_0_17" ], "const_args": [ "onnx::MatMul_8350" ], "out_args": [ "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34", "/Add_16_output_0.out_35_1_20" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma" ], "out_args": [ "/transformer_blocks.17/Add_output_0.out10_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_output_0.out10_34" ], "const_args": [ "/transformer_blocks.17/norm2/Constant_output_0", "/transformer_blocks.17/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4" ], "out_args": [ "/transformer_blocks.17/Add_2_output_0.out0_0_70_bfp.out1_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/Add_2_output_0.out0_0_70_bfp.out1_35" ], "const_args": [ "onnx::MatMul_8352" ], "out_args": [ "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_73" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_73" ], "const_args": [ "onnx::MatMul_8353" ], "out_args": [ "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_147_bfp.out25_75" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_147_bfp.out25_75", "/transformer_blocks.17/Add_output_0.out10_34" ], "const_args": [ "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma" ], "out_args": [ "/transformer_blocks.17/Add_3_output_0.out10_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_17", "type": "SDAdd", "in_args": [ "/transformer_blocks.17/Add_3_output_0.out10_35", "block_controlnet_hidden_states_8.out_35_1_20" ], "const_args": [], "out_args": [ "/Add_17_output_0.out_35_1_21" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.17/attn/Reshape_3_output_0.out22_17_bfp.out27_0_17" ], "const_args": [ "onnx::MatMul_8351" ], "out_args": [ "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35", "/transformer_blocks.16/Add_7_output_0.out10_33" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma" ], "out_args": [ "/transformer_blocks.17/Add_4_output_0.out10_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_4_output_0.out10_36" ], "const_args": [ "/transformer_blocks.17/norm2_context/Constant_output_0", "/transformer_blocks.17/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4" ], "out_args": [ "/transformer_blocks.17/Add_6_output_0.out0_0_71_bfp.out1_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/Add_6_output_0.out0_0_71_bfp.out1_37" ], "const_args": [ "onnx::MatMul_8354" ], "out_args": [ "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_74" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_74" ], "const_args": [ "onnx::MatMul_8355" ], "out_args": [ "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_148_bfp.out25_76" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.17/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_148_bfp.out25_76", "/transformer_blocks.17/Add_4_output_0.out10_36" ], "const_args": [ "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma" ], "out_args": [ "/transformer_blocks.17/Add_7_output_0.out10_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_17_output_0.out_35_1_21" ], "const_args": [ "/transformer_blocks.18/norm1/norm/Constant_output_0", "/transformer_blocks.18/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1" ], "out_args": [ "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.17/Add_7_output_0.out10_37" ], "const_args": [ "/transformer_blocks.18/norm1_context/norm/Constant_output_0", "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1" ], "out_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38" ], "const_args": [ "onnx::MatMul_8359_onnx::MatMul_8356" ], "out_args": [ "/transformer_blocks.18/attn/Concat_output_0.out22_18_bfp.out23_54" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38" ], "const_args": [ "onnx::MatMul_8360_onnx::MatMul_8357" ], "out_args": [ "/transformer_blocks.18/attn/Concat_1_output_0.out22_18_bfp.out23_55" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38" ], "const_args": [ "onnx::MatMul_8361_onnx::MatMul_8358" ], "out_args": [ "/transformer_blocks.18/attn/Concat_2_output_0.out22_18_bfp.out23_56" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_18", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.18/attn/Concat_output_0.out22_18_bfp.out23_54", "/transformer_blocks.18/attn/Concat_1_output_0.out22_18_bfp.out23_55", "/transformer_blocks.18/attn/Concat_2_output_0.out22_18_bfp.out23_56" ], "const_args": [], "out_args": [ "/transformer_blocks.18/attn/Reshape_3_output_0.out22_18_bfp.out27_0_18" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.18/attn/Reshape_3_output_0.out22_18_bfp.out27_0_18" ], "const_args": [ "onnx::MatMul_8375" ], "out_args": [ "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36", "/Add_17_output_0.out_35_1_21" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma" ], "out_args": [ "/transformer_blocks.18/Add_output_0.out10_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_output_0.out10_38" ], "const_args": [ "/transformer_blocks.18/norm2/Constant_output_0", "/transformer_blocks.18/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4" ], "out_args": [ "/transformer_blocks.18/Add_2_output_0.out0_0_74_bfp.out1_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/Add_2_output_0.out0_0_74_bfp.out1_39" ], "const_args": [ "onnx::MatMul_8377" ], "out_args": [ "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_77" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_77" ], "const_args": [ "onnx::MatMul_8378" ], "out_args": [ "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_155_bfp.out25_79" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_155_bfp.out25_79", "/transformer_blocks.18/Add_output_0.out10_38" ], "const_args": [ "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma" ], "out_args": [ "/transformer_blocks.18/Add_3_output_0.out10_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.18/attn/Reshape_3_output_0.out22_18_bfp.out27_0_18" ], "const_args": [ "onnx::MatMul_8376" ], "out_args": [ "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37", "/transformer_blocks.17/Add_7_output_0.out10_37" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma" ], "out_args": [ "/transformer_blocks.18/Add_4_output_0.out10_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_4_output_0.out10_40" ], "const_args": [ "/transformer_blocks.18/norm2_context/Constant_output_0", "/transformer_blocks.18/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4" ], "out_args": [ "/transformer_blocks.18/Add_6_output_0.out0_0_75_bfp.out1_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/Add_6_output_0.out0_0_75_bfp.out1_41" ], "const_args": [ "onnx::MatMul_8379" ], "out_args": [ "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_78" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_78" ], "const_args": [ "onnx::MatMul_8380" ], "out_args": [ "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_156_bfp.out25_80" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.18/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_156_bfp.out25_80", "/transformer_blocks.18/Add_4_output_0.out10_40" ], "const_args": [ "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma" ], "out_args": [ "/transformer_blocks.18/Add_7_output_0.out10_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.18/Add_7_output_0.out10_41" ], "const_args": [ "/transformer_blocks.19/norm1_context/norm/Constant_output_0", "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_18", "type": "SDAdd", "in_args": [ "/transformer_blocks.18/Add_3_output_0.out10_39", "block_controlnet_hidden_states_9.out_35_1_22" ], "const_args": [], "out_args": [ "/Add_18_output_0.out_35_1_22" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_18_output_0.out_35_1_22" ], "const_args": [ "/transformer_blocks.19/norm1/norm/Constant_output_0", "/transformer_blocks.19/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1" ], "out_args": [ "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1" ], "out_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42" ], "const_args": [ "onnx::MatMul_8384_onnx::MatMul_8381" ], "out_args": [ "/transformer_blocks.19/attn/Concat_output_0.out22_19_bfp.out23_57" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42" ], "const_args": [ "onnx::MatMul_8385_onnx::MatMul_8382" ], "out_args": [ "/transformer_blocks.19/attn/Concat_1_output_0.out22_19_bfp.out23_58" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42" ], "const_args": [ "onnx::MatMul_8386_onnx::MatMul_8383" ], "out_args": [ "/transformer_blocks.19/attn/Concat_2_output_0.out22_19_bfp.out23_59" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_19", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.19/attn/Concat_output_0.out22_19_bfp.out23_57", "/transformer_blocks.19/attn/Concat_1_output_0.out22_19_bfp.out23_58", "/transformer_blocks.19/attn/Concat_2_output_0.out22_19_bfp.out23_59" ], "const_args": [], "out_args": [ "/transformer_blocks.19/attn/Reshape_3_output_0.out22_19_bfp.out27_0_19" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.19/attn/Reshape_3_output_0.out22_19_bfp.out27_0_19" ], "const_args": [ "onnx::MatMul_8400" ], "out_args": [ "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38", "/Add_18_output_0.out_35_1_22" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma" ], "out_args": [ "/transformer_blocks.19/Add_output_0.out10_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_output_0.out10_42" ], "const_args": [ "/transformer_blocks.19/norm2/Constant_output_0", "/transformer_blocks.19/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4" ], "out_args": [ "/transformer_blocks.19/Add_2_output_0.out0_0_78_bfp.out1_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/Add_2_output_0.out0_0_78_bfp.out1_43" ], "const_args": [ "onnx::MatMul_8402" ], "out_args": [ "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_81" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_81" ], "const_args": [ "onnx::MatMul_8403" ], "out_args": [ "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_163_bfp.out25_83" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_163_bfp.out25_83", "/transformer_blocks.19/Add_output_0.out10_42" ], "const_args": [ "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma" ], "out_args": [ "/transformer_blocks.19/Add_3_output_0.out10_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_19", "type": "SDAdd", "in_args": [ "/transformer_blocks.19/Add_3_output_0.out10_43", "block_controlnet_hidden_states_9.out_35_1_22" ], "const_args": [], "out_args": [ "/Add_19_output_0.out_35_1_23" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.19/attn/Reshape_3_output_0.out22_19_bfp.out27_0_19" ], "const_args": [ "onnx::MatMul_8401" ], "out_args": [ "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39", "/transformer_blocks.18/Add_7_output_0.out10_41" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma" ], "out_args": [ "/transformer_blocks.19/Add_4_output_0.out10_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_4_output_0.out10_44" ], "const_args": [ "/transformer_blocks.19/norm2_context/Constant_output_0", "/transformer_blocks.19/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4" ], "out_args": [ "/transformer_blocks.19/Add_6_output_0.out0_0_79_bfp.out1_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/Add_6_output_0.out0_0_79_bfp.out1_45" ], "const_args": [ "onnx::MatMul_8404" ], "out_args": [ "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_82" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_82" ], "const_args": [ "onnx::MatMul_8405" ], "out_args": [ "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_164_bfp.out25_84" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.19/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_164_bfp.out25_84", "/transformer_blocks.19/Add_4_output_0.out10_44" ], "const_args": [ "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma" ], "out_args": [ "/transformer_blocks.19/Add_7_output_0.out10_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_19_output_0.out_35_1_23" ], "const_args": [ "/transformer_blocks.20/norm1/norm/Constant_output_0", "/transformer_blocks.20/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.19/Add_7_output_0.out10_45" ], "const_args": [ "/transformer_blocks.20/norm1_context/norm/Constant_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1" ], "out_args": [ "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1" ], "out_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50" ], "const_args": [ "onnx::MatMul_8409_onnx::MatMul_8406" ], "out_args": [ "/transformer_blocks.20/attn/Concat_output_0.out22_20_bfp.out23_60" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50" ], "const_args": [ "onnx::MatMul_8410_onnx::MatMul_8407" ], "out_args": [ "/transformer_blocks.20/attn/Concat_1_output_0.out22_20_bfp.out23_61" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50" ], "const_args": [ "onnx::MatMul_8411_onnx::MatMul_8408" ], "out_args": [ "/transformer_blocks.20/attn/Concat_2_output_0.out22_20_bfp.out23_62" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_20", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.20/attn/Concat_output_0.out22_20_bfp.out23_60", "/transformer_blocks.20/attn/Concat_1_output_0.out22_20_bfp.out23_61", "/transformer_blocks.20/attn/Concat_2_output_0.out22_20_bfp.out23_62" ], "const_args": [], "out_args": [ "/transformer_blocks.20/attn/Reshape_3_output_0.out22_20_bfp.out27_0_20" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.20/attn/Reshape_3_output_0.out22_20_bfp.out27_0_20" ], "const_args": [ "onnx::MatMul_8426" ], "out_args": [ "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.20/attn/Reshape_3_output_0.out22_20_bfp.out27_0_20" ], "const_args": [ "onnx::MatMul_8425" ], "out_args": [ "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40", "/Add_19_output_0.out_35_1_23" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma" ], "out_args": [ "/transformer_blocks.20/Add_output_0.out10_50" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_output_0.out10_50" ], "const_args": [ "/transformer_blocks.20/norm2/Constant_output_0", "/transformer_blocks.20/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4" ], "out_args": [ "/transformer_blocks.20/Add_2_output_0.out0_0_82_bfp.out1_51" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/Add_2_output_0.out0_0_82_bfp.out1_51" ], "const_args": [ "onnx::MatMul_8427" ], "out_args": [ "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_85" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_85" ], "const_args": [ "onnx::MatMul_8428" ], "out_args": [ "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_171_bfp.out25_87" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_171_bfp.out25_87", "/transformer_blocks.20/Add_output_0.out10_50" ], "const_args": [ "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma" ], "out_args": [ "/transformer_blocks.20/Add_3_output_0.out10_51" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41", "/transformer_blocks.19/Add_7_output_0.out10_45" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma" ], "out_args": [ "/transformer_blocks.20/Add_4_output_0.out10_52" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_4_output_0.out10_52" ], "const_args": [ "/transformer_blocks.20/norm2_context/Constant_output_0", "/transformer_blocks.20/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4" ], "out_args": [ "/transformer_blocks.20/Add_6_output_0.out0_0_83_bfp.out1_53" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/Add_6_output_0.out0_0_83_bfp.out1_53" ], "const_args": [ "onnx::MatMul_8429" ], "out_args": [ "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_86" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_86" ], "const_args": [ "onnx::MatMul_8430" ], "out_args": [ "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_172_bfp.out25_88" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.20/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_172_bfp.out25_88", "/transformer_blocks.20/Add_4_output_0.out10_52" ], "const_args": [ "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma" ], "out_args": [ "/transformer_blocks.20/Add_7_output_0.out10_53" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.20/Add_7_output_0.out10_53" ], "const_args": [ "/transformer_blocks.21/norm1_context/norm/Constant_output_0", "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_20", "type": "SDAdd", "in_args": [ "/transformer_blocks.20/Add_3_output_0.out10_51", "block_controlnet_hidden_states_10.out_35_1_24" ], "const_args": [], "out_args": [ "/Add_20_output_0.out_35_1_24" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_20_output_0.out_35_1_24" ], "const_args": [ "/transformer_blocks.21/norm1/norm/Constant_output_0", "/transformer_blocks.21/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1" ], "out_args": [ "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1" ], "out_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54" ], "const_args": [ "onnx::MatMul_8434_onnx::MatMul_8431" ], "out_args": [ "/transformer_blocks.21/attn/Concat_output_0.out22_21_bfp.out23_63" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54" ], "const_args": [ "onnx::MatMul_8435_onnx::MatMul_8432" ], "out_args": [ "/transformer_blocks.21/attn/Concat_1_output_0.out22_21_bfp.out23_64" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54" ], "const_args": [ "onnx::MatMul_8436_onnx::MatMul_8433" ], "out_args": [ "/transformer_blocks.21/attn/Concat_2_output_0.out22_21_bfp.out23_65" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_21", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.21/attn/Concat_output_0.out22_21_bfp.out23_63", "/transformer_blocks.21/attn/Concat_1_output_0.out22_21_bfp.out23_64", "/transformer_blocks.21/attn/Concat_2_output_0.out22_21_bfp.out23_65" ], "const_args": [], "out_args": [ "/transformer_blocks.21/attn/Reshape_3_output_0.out22_21_bfp.out27_0_21" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.21/attn/Reshape_3_output_0.out22_21_bfp.out27_0_21" ], "const_args": [ "onnx::MatMul_8450" ], "out_args": [ "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42", "/Add_20_output_0.out_35_1_24" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma" ], "out_args": [ "/transformer_blocks.21/Add_output_0.out10_54" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_output_0.out10_54" ], "const_args": [ "/transformer_blocks.21/norm2/Constant_output_0", "/transformer_blocks.21/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4" ], "out_args": [ "/transformer_blocks.21/Add_2_output_0.out0_0_86_bfp.out1_55" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/Add_2_output_0.out0_0_86_bfp.out1_55" ], "const_args": [ "onnx::MatMul_8452" ], "out_args": [ "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_89" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_89" ], "const_args": [ "onnx::MatMul_8453" ], "out_args": [ "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_179_bfp.out25_91" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_179_bfp.out25_91", "/transformer_blocks.21/Add_output_0.out10_54" ], "const_args": [ "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma" ], "out_args": [ "/transformer_blocks.21/Add_3_output_0.out10_55" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_21", "type": "SDAdd", "in_args": [ "/transformer_blocks.21/Add_3_output_0.out10_55", "block_controlnet_hidden_states_10.out_35_1_24" ], "const_args": [], "out_args": [ "/Add_21_output_0.out_35_1_25" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.21/attn/Reshape_3_output_0.out22_21_bfp.out27_0_21" ], "const_args": [ "onnx::MatMul_8451" ], "out_args": [ "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43", "/transformer_blocks.20/Add_7_output_0.out10_53" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma" ], "out_args": [ "/transformer_blocks.21/Add_4_output_0.out10_56" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_4_output_0.out10_56" ], "const_args": [ "/transformer_blocks.21/norm2_context/Constant_output_0", "/transformer_blocks.21/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4" ], "out_args": [ "/transformer_blocks.21/Add_6_output_0.out0_0_87_bfp.out1_57" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/Add_6_output_0.out0_0_87_bfp.out1_57" ], "const_args": [ "onnx::MatMul_8454" ], "out_args": [ "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_90" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_90" ], "const_args": [ "onnx::MatMul_8455" ], "out_args": [ "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_180_bfp.out25_92" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.21/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_180_bfp.out25_92", "/transformer_blocks.21/Add_4_output_0.out10_56" ], "const_args": [ "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma" ], "out_args": [ "/transformer_blocks.21/Add_7_output_0.out10_57" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_21_output_0.out_35_1_25" ], "const_args": [ "/transformer_blocks.22/norm1/norm/Constant_output_0", "/transformer_blocks.22/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1" ], "out_args": [ "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.21/Add_7_output_0.out10_57" ], "const_args": [ "/transformer_blocks.22/norm1_context/norm/Constant_output_0", "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1" ], "out_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58" ], "const_args": [ "onnx::MatMul_8459_onnx::MatMul_8456" ], "out_args": [ "/transformer_blocks.22/attn/Concat_output_0.out22_22_bfp.out23_66" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58" ], "const_args": [ "onnx::MatMul_8460_onnx::MatMul_8457" ], "out_args": [ "/transformer_blocks.22/attn/Concat_1_output_0.out22_22_bfp.out23_67" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58" ], "const_args": [ "onnx::MatMul_8461_onnx::MatMul_8458" ], "out_args": [ "/transformer_blocks.22/attn/Concat_2_output_0.out22_22_bfp.out23_68" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_22", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.22/attn/Concat_output_0.out22_22_bfp.out23_66", "/transformer_blocks.22/attn/Concat_1_output_0.out22_22_bfp.out23_67", "/transformer_blocks.22/attn/Concat_2_output_0.out22_22_bfp.out23_68" ], "const_args": [], "out_args": [ "/transformer_blocks.22/attn/Reshape_3_output_0.out22_22_bfp.out27_0_22" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.22/attn/Reshape_3_output_0.out22_22_bfp.out27_0_22" ], "const_args": [ "onnx::MatMul_8475" ], "out_args": [ "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44", "/Add_21_output_0.out_35_1_25" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma" ], "out_args": [ "/transformer_blocks.22/Add_output_0.out10_58" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_output_0.out10_58" ], "const_args": [ "/transformer_blocks.22/norm2/Constant_output_0", "/transformer_blocks.22/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4" ], "out_args": [ "/transformer_blocks.22/Add_2_output_0.out0_0_90_bfp.out1_59" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/Add_2_output_0.out0_0_90_bfp.out1_59" ], "const_args": [ "onnx::MatMul_8477" ], "out_args": [ "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_93" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_93" ], "const_args": [ "onnx::MatMul_8478" ], "out_args": [ "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_187_bfp.out25_95" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_187_bfp.out25_95", "/transformer_blocks.22/Add_output_0.out10_58" ], "const_args": [ "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma" ], "out_args": [ "/transformer_blocks.22/Add_3_output_0.out10_59" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.22/attn/Reshape_3_output_0.out22_22_bfp.out27_0_22" ], "const_args": [ "onnx::MatMul_8476" ], "out_args": [ "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "str", "value": [ "state_dim1" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45", "/transformer_blocks.21/Add_7_output_0.out10_57" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma" ], "out_args": [ "/transformer_blocks.22/Add_4_output_0.out10_60" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_4_output_0.out10_60" ], "const_args": [ "/transformer_blocks.22/norm2_context/Constant_output_0", "/transformer_blocks.22/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4" ], "out_args": [ "/transformer_blocks.22/Add_6_output_0.out0_0_91_bfp.out1_61" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/Add_6_output_0.out0_0_91_bfp.out1_61" ], "const_args": [ "onnx::MatMul_8479" ], "out_args": [ "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_94" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_94" ], "const_args": [ "onnx::MatMul_8480" ], "out_args": [ "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_188_bfp.out25_96" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.22/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_188_bfp.out25_96", "/transformer_blocks.22/Add_4_output_0.out10_60" ], "const_args": [ "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma" ], "out_args": [ "/transformer_blocks.22/Add_7_output_0.out10_61" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.22/Add_7_output_0.out10_61" ], "const_args": [ "/transformer_blocks.23/norm1_context/norm/Constant_output_0", "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add_22", "type": "SDAdd", "in_args": [ "/transformer_blocks.22/Add_3_output_0.out10_59", "block_controlnet_hidden_states_11.out_35_1_26" ], "const_args": [], "out_args": [ "/Add_22_output_0.out_35_1_26" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_22_output_0.out_35_1_26" ], "const_args": [ "/transformer_blocks.23/norm1/norm/Constant_output_0", "/transformer_blocks.23/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1" ], "out_args": [ "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92" ], "const_args": [ "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1" ], "out_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62" ], "const_args": [ "onnx::MatMul_8484_onnx::MatMul_8481" ], "out_args": [ "/transformer_blocks.23/attn/Concat_output_0.out22_23_bfp.out23_69" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62" ], "const_args": [ "onnx::MatMul_8485_onnx::MatMul_8482" ], "out_args": [ "/transformer_blocks.23/attn/Concat_1_output_0.out22_23_bfp.out23_70" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62" ], "const_args": [ "onnx::MatMul_8486_onnx::MatMul_8483" ], "out_args": [ "/transformer_blocks.23/attn/Concat_2_output_0.out22_23_bfp.out23_71" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_23", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.23/attn/Concat_output_0.out22_23_bfp.out23_69", "/transformer_blocks.23/attn/Concat_1_output_0.out22_23_bfp.out23_70", "/transformer_blocks.23/attn/Concat_2_output_0.out22_23_bfp.out23_71" ], "const_args": [], "out_args": [ "/transformer_blocks.23/attn/Reshape_3_output_0.out22_23_bfp.out27_0_23" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "64", "max_length + state_dim1" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.23/attn/Reshape_3_output_0.out22_23_bfp.out27_0_23" ], "const_args": [ "onnx::MatMul_8497" ], "out_args": [ "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + state_dim1", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "state_dim1" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46", "/Add_22_output_0.out_35_1_26" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma" ], "out_args": [ "/transformer_blocks.23/Add_output_0.out10_62" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.23/Add_output_0.out10_62" ], "const_args": [ "/transformer_blocks.23/norm2/Constant_output_0", "/transformer_blocks.23/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4" ], "out_args": [ "/transformer_blocks.23/Add_2_output_0.out0_0_94_bfp.out1_63" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.23/Add_2_output_0.out0_0_94_bfp.out1_63" ], "const_args": [ "onnx::MatMul_8498" ], "out_args": [ "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_97" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_97" ], "const_args": [ "onnx::MatMul_8499" ], "out_args": [ "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_195_bfp.out25_98" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.23/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_195_bfp.out25_98", "/transformer_blocks.23/Add_output_0.out10_62" ], "const_args": [ "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma" ], "out_args": [ "/transformer_blocks.23/Add_3_output_0.out10_63" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.23/Add_3_output_0.out10_63" ], "const_args": [ "/norm_out/norm/Constant_output_0", "/norm_out/norm/Constant_1_output_0" ], "out_args": [ "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95" ], "const_args": [ "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1" ], "out_args": [ "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93" ], "const_args": [ "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93_bfp.wts" ], "out_args": [ "/norm_out/Add_2_output_0.out0_0_95" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/proj_out/MatMul", "type": "SDGemm", "in_args": [ "/norm_out/Add_2_output_0.out0_0_95" ], "const_args": [ "onnx::MatMul_8500" ], "out_args": [ "/Reshape_output_0.out17_0_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "state_dim1", "64" ] }, "weight_shape": { "type": "int", "value": [ "1536", "64" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } } ], "fused_tensors": { "in": { "buffer_size": 86560, "xrt_arg_id": 0, "packed_tensors": [ "hidden_states_nhwc.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2", "/time_text_embed/Cast_output_0.out17_3_3", "pooled_projections.out17_3_1", "encoder_hidden_states.out17_3_0", "block_controlnet_hidden_states_0.out_35_1_4", "block_controlnet_hidden_states_1.out_35_1_6", "block_controlnet_hidden_states_2.out_35_1_8", "block_controlnet_hidden_states_3.out_35_1_10", "block_controlnet_hidden_states_4.out_35_1_12", "block_controlnet_hidden_states_5.out_35_1_14", "block_controlnet_hidden_states_6.out_35_1_16", "block_controlnet_hidden_states_7.out_35_1_18", "block_controlnet_hidden_states_8.out_35_1_20", "block_controlnet_hidden_states_9.out_35_1_22", "block_controlnet_hidden_states_10.out_35_1_24", "block_controlnet_hidden_states_11.out_35_1_26" ] }, "out": { "buffer_size": 128, "xrt_arg_id": 1, "packed_tensors": [ "/Reshape_output_0.out17_0_0" ] }, "scratch": { "buffer_size": 1277440, "xrt_arg_id": 2, "packed_tensors": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Add_2_output_0.out_35_1_2", "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3", "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1", "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1", "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2", "/time_text_embed/Add_output_0.out_35_1_3", "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "encoder_hidden_states.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0", "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94", "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2", "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/transformer_blocks.0/Add_4_output_0.out10_0", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3", "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1", "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_7_output_0.out10_1", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/transformer_blocks.0/Add_output_0.out10_92", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2", "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_95", "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_3_output_0.out10_93", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4", "/Add_output_0.out_35_1_4", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2", "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4", "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5", "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.1/Add_output_0.out10_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6", "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3", "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_9", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_19_bfp.out25_11", "/transformer_blocks.1/Add_3_output_0.out10_3", "/Add_1_output_0.out_35_1_5", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.1/Add_4_output_0.out10_4", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7", "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5", "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_10", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_7_output_0.out10_5", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46", "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48", "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8", "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.2/Add_4_output_0.out10_48", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.2/Add_output_0.out10_46", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10", "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_47", "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_13", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_27_bfp.out25_15", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11", "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_49", "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_14", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_16", "/transformer_blocks.2/Add_7_output_0.out10_49", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12", "/transformer_blocks.2/Add_3_output_0.out10_47", "/Add_2_output_0.out_35_1_6", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65", "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67", "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11", "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.3/Add_4_output_0.out10_66", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.3/Add_output_0.out10_64", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14", "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_66", "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_17", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_35_bfp.out25_19", "/transformer_blocks.3/Add_3_output_0.out10_65", "/Add_3_output_0.out_35_1_7", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15", "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_68", "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_18", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_36_bfp.out25_20", "/transformer_blocks.3/Add_7_output_0.out10_67", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69", "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71", "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14", "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.4/Add_4_output_0.out10_70", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.4/Add_output_0.out10_68", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18", "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_70", "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_21", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_43_bfp.out25_23", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19", "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_72", "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_22", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_44_bfp.out25_24", "/transformer_blocks.4/Add_7_output_0.out10_71", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20", "/transformer_blocks.4/Add_3_output_0.out10_69", "/Add_4_output_0.out_35_1_8", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73", "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75", "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17", "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.5/Add_4_output_0.out10_74", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.5/Add_output_0.out10_72", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22", "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_74", "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_25", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_51_bfp.out25_27", "/transformer_blocks.5/Add_3_output_0.out10_73", "/Add_5_output_0.out_35_1_9", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23", "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_76", "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_26", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_28", "/transformer_blocks.5/Add_7_output_0.out10_75", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77", "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79", "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18", "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19", "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20", "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.6/Add_4_output_0.out10_78", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/transformer_blocks.6/Add_output_0.out10_76", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26", "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_78", "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_29", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_59_bfp.out25_31", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27", "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_80", "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_30", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_60_bfp.out25_32", "/transformer_blocks.6/Add_7_output_0.out10_79", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28", "/transformer_blocks.6/Add_3_output_0.out10_77", "/Add_6_output_0.out_35_1_10", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81", "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83", "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21", "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22", "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23", "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.7/Add_4_output_0.out10_82", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/transformer_blocks.7/Add_output_0.out10_80", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30", "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_82", "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_33", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_67_bfp.out25_35", "/transformer_blocks.7/Add_3_output_0.out10_81", "/Add_7_output_0.out_35_1_11", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31", "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_84", "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_34", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_68_bfp.out25_36", "/transformer_blocks.7/Add_7_output_0.out10_83", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85", "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87", "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24", "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25", "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26", "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.8/Add_4_output_0.out10_86", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/transformer_blocks.8/Add_output_0.out10_84", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34", "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_86", "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_37", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_75_bfp.out25_39", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35", "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_88", "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_38", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_40", "/transformer_blocks.8/Add_7_output_0.out10_87", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36", "/transformer_blocks.8/Add_3_output_0.out10_85", "/Add_8_output_0.out_35_1_12", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89", "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91", "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27", "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28", "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29", "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.9/Add_4_output_0.out10_90", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/transformer_blocks.9/Add_output_0.out10_88", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38", "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_90", "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_41", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_83_bfp.out25_43", "/transformer_blocks.9/Add_3_output_0.out10_89", "/Add_9_output_0.out_35_1_13", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39", "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_92", "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_42", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_44", "/transformer_blocks.9/Add_7_output_0.out10_91", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41", "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30", "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31", "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32", "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/transformer_blocks.10/Add_output_0.out10_6", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42", "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7", "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_45", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_91_bfp.out25_47", "/transformer_blocks.10/Add_3_output_0.out10_7", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.10/Add_4_output_0.out10_8", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43", "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9", "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_46", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_92_bfp.out25_48", "/transformer_blocks.10/Add_7_output_0.out10_9", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44", "/Add_10_output_0.out_35_1_14", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10", "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12", "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33", "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34", "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35", "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/transformer_blocks.11/Add_output_0.out10_10", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46", "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11", "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_49", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_99_bfp.out25_51", "/transformer_blocks.11/Add_3_output_0.out10_11", "/Add_11_output_0.out_35_1_15", "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23", "/transformer_blocks.11/Add_4_output_0.out10_12", "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47", "/transformer_blocks.11/Add_6_output_0.out0_0_47_bfp.out1_13", "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_50", "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_100_bfp.out25_52", "/transformer_blocks.11/Add_7_output_0.out10_13", "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49", "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14", "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48", "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16", "/transformer_blocks.12/attn/Concat_output_0.out22_12_bfp.out23_36", "/transformer_blocks.12/attn/Concat_1_output_0.out22_12_bfp.out23_37", "/transformer_blocks.12/attn/Concat_2_output_0.out22_12_bfp.out23_38", "/transformer_blocks.12/attn/Reshape_3_output_0.out22_12_bfp.out27_0_12", "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24", "/transformer_blocks.12/Add_output_0.out10_14", "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50", "/transformer_blocks.12/Add_2_output_0.out0_0_50_bfp.out1_15", "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_53", "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_107_bfp.out25_55", "/transformer_blocks.12/Add_3_output_0.out10_15", "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25", "/transformer_blocks.12/Add_4_output_0.out10_16", "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51", "/transformer_blocks.12/Add_6_output_0.out0_0_51_bfp.out1_17", "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_54", "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_108_bfp.out25_56", "/transformer_blocks.12/Add_7_output_0.out10_17", "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52", "/Add_12_output_0.out_35_1_16", "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53", "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18", "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20", "/transformer_blocks.13/attn/Concat_output_0.out22_13_bfp.out23_39", "/transformer_blocks.13/attn/Concat_1_output_0.out22_13_bfp.out23_40", "/transformer_blocks.13/attn/Concat_2_output_0.out22_13_bfp.out23_41", "/transformer_blocks.13/attn/Reshape_3_output_0.out22_13_bfp.out27_0_13", "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26", "/transformer_blocks.13/Add_output_0.out10_18", "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54", "/transformer_blocks.13/Add_2_output_0.out0_0_54_bfp.out1_19", "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_57", "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_115_bfp.out25_59", "/transformer_blocks.13/Add_3_output_0.out10_19", "/Add_13_output_0.out_35_1_17", "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27", "/transformer_blocks.13/Add_4_output_0.out10_20", "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55", "/transformer_blocks.13/Add_6_output_0.out0_0_55_bfp.out1_21", "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_58", "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_116_bfp.out25_60", "/transformer_blocks.13/Add_7_output_0.out10_21", "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57", "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22", "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56", "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24", "/transformer_blocks.14/attn/Concat_output_0.out22_14_bfp.out23_42", "/transformer_blocks.14/attn/Concat_1_output_0.out22_14_bfp.out23_43", "/transformer_blocks.14/attn/Concat_2_output_0.out22_14_bfp.out23_44", "/transformer_blocks.14/attn/Reshape_3_output_0.out22_14_bfp.out27_0_14", "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28", "/transformer_blocks.14/Add_output_0.out10_22", "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58", "/transformer_blocks.14/Add_2_output_0.out0_0_58_bfp.out1_23", "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_61", "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_123_bfp.out25_63", "/transformer_blocks.14/Add_3_output_0.out10_23", "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29", "/transformer_blocks.14/Add_4_output_0.out10_24", "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59", "/transformer_blocks.14/Add_6_output_0.out0_0_59_bfp.out1_25", "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_62", "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_124_bfp.out25_64", "/transformer_blocks.14/Add_7_output_0.out10_25", "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60", "/Add_14_output_0.out_35_1_18", "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61", "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26", "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28", "/transformer_blocks.15/attn/Concat_output_0.out22_15_bfp.out23_45", "/transformer_blocks.15/attn/Concat_1_output_0.out22_15_bfp.out23_46", "/transformer_blocks.15/attn/Concat_2_output_0.out22_15_bfp.out23_47", "/transformer_blocks.15/attn/Reshape_3_output_0.out22_15_bfp.out27_0_15", "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30", "/transformer_blocks.15/Add_output_0.out10_26", "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62", "/transformer_blocks.15/Add_2_output_0.out0_0_62_bfp.out1_27", "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_65", "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_131_bfp.out25_67", "/transformer_blocks.15/Add_3_output_0.out10_27", "/Add_15_output_0.out_35_1_19", "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31", "/transformer_blocks.15/Add_4_output_0.out10_28", "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63", "/transformer_blocks.15/Add_6_output_0.out0_0_63_bfp.out1_29", "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_66", "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_132_bfp.out25_68", "/transformer_blocks.15/Add_7_output_0.out10_29", "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65", "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30", "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64", "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32", "/transformer_blocks.16/attn/Concat_output_0.out22_16_bfp.out23_48", "/transformer_blocks.16/attn/Concat_1_output_0.out22_16_bfp.out23_49", "/transformer_blocks.16/attn/Concat_2_output_0.out22_16_bfp.out23_50", "/transformer_blocks.16/attn/Reshape_3_output_0.out22_16_bfp.out27_0_16", "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32", "/transformer_blocks.16/Add_output_0.out10_30", "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66", "/transformer_blocks.16/Add_2_output_0.out0_0_66_bfp.out1_31", "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_69", "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_139_bfp.out25_71", "/transformer_blocks.16/Add_3_output_0.out10_31", "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33", "/transformer_blocks.16/Add_4_output_0.out10_32", "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67", "/transformer_blocks.16/Add_6_output_0.out0_0_67_bfp.out1_33", "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_70", "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_140_bfp.out25_72", "/transformer_blocks.16/Add_7_output_0.out10_33", "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68", "/Add_16_output_0.out_35_1_20", "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69", "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34", "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36", "/transformer_blocks.17/attn/Concat_output_0.out22_17_bfp.out23_51", "/transformer_blocks.17/attn/Concat_1_output_0.out22_17_bfp.out23_52", "/transformer_blocks.17/attn/Concat_2_output_0.out22_17_bfp.out23_53", "/transformer_blocks.17/attn/Reshape_3_output_0.out22_17_bfp.out27_0_17", "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34", "/transformer_blocks.17/Add_output_0.out10_34", "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70", "/transformer_blocks.17/Add_2_output_0.out0_0_70_bfp.out1_35", "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_73", "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_147_bfp.out25_75", "/transformer_blocks.17/Add_3_output_0.out10_35", "/Add_17_output_0.out_35_1_21", "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35", "/transformer_blocks.17/Add_4_output_0.out10_36", "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71", "/transformer_blocks.17/Add_6_output_0.out0_0_71_bfp.out1_37", "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_74", "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_148_bfp.out25_76", "/transformer_blocks.17/Add_7_output_0.out10_37", "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73", "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38", "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72", "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40", "/transformer_blocks.18/attn/Concat_output_0.out22_18_bfp.out23_54", "/transformer_blocks.18/attn/Concat_1_output_0.out22_18_bfp.out23_55", "/transformer_blocks.18/attn/Concat_2_output_0.out22_18_bfp.out23_56", "/transformer_blocks.18/attn/Reshape_3_output_0.out22_18_bfp.out27_0_18", "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36", "/transformer_blocks.18/Add_output_0.out10_38", "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74", "/transformer_blocks.18/Add_2_output_0.out0_0_74_bfp.out1_39", "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_77", "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_155_bfp.out25_79", "/transformer_blocks.18/Add_3_output_0.out10_39", "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37", "/transformer_blocks.18/Add_4_output_0.out10_40", "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75", "/transformer_blocks.18/Add_6_output_0.out0_0_75_bfp.out1_41", "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_78", "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_156_bfp.out25_80", "/transformer_blocks.18/Add_7_output_0.out10_41", "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76", "/Add_18_output_0.out_35_1_22", "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77", "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42", "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44", "/transformer_blocks.19/attn/Concat_output_0.out22_19_bfp.out23_57", "/transformer_blocks.19/attn/Concat_1_output_0.out22_19_bfp.out23_58", "/transformer_blocks.19/attn/Concat_2_output_0.out22_19_bfp.out23_59", "/transformer_blocks.19/attn/Reshape_3_output_0.out22_19_bfp.out27_0_19", "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38", "/transformer_blocks.19/Add_output_0.out10_42", "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78", "/transformer_blocks.19/Add_2_output_0.out0_0_78_bfp.out1_43", "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_81", "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_163_bfp.out25_83", "/transformer_blocks.19/Add_3_output_0.out10_43", "/Add_19_output_0.out_35_1_23", "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39", "/transformer_blocks.19/Add_4_output_0.out10_44", "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79", "/transformer_blocks.19/Add_6_output_0.out0_0_79_bfp.out1_45", "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_82", "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_164_bfp.out25_84", "/transformer_blocks.19/Add_7_output_0.out10_45", "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81", "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80", "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50", "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52", "/transformer_blocks.20/attn/Concat_output_0.out22_20_bfp.out23_60", "/transformer_blocks.20/attn/Concat_1_output_0.out22_20_bfp.out23_61", "/transformer_blocks.20/attn/Concat_2_output_0.out22_20_bfp.out23_62", "/transformer_blocks.20/attn/Reshape_3_output_0.out22_20_bfp.out27_0_20", "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41", "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40", "/transformer_blocks.20/Add_output_0.out10_50", "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82", "/transformer_blocks.20/Add_2_output_0.out0_0_82_bfp.out1_51", "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_85", "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_171_bfp.out25_87", "/transformer_blocks.20/Add_3_output_0.out10_51", "/transformer_blocks.20/Add_4_output_0.out10_52", "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83", "/transformer_blocks.20/Add_6_output_0.out0_0_83_bfp.out1_53", "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_86", "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_172_bfp.out25_88", "/transformer_blocks.20/Add_7_output_0.out10_53", "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84", "/Add_20_output_0.out_35_1_24", "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85", "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54", "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56", "/transformer_blocks.21/attn/Concat_output_0.out22_21_bfp.out23_63", "/transformer_blocks.21/attn/Concat_1_output_0.out22_21_bfp.out23_64", "/transformer_blocks.21/attn/Concat_2_output_0.out22_21_bfp.out23_65", "/transformer_blocks.21/attn/Reshape_3_output_0.out22_21_bfp.out27_0_21", "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42", "/transformer_blocks.21/Add_output_0.out10_54", "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86", "/transformer_blocks.21/Add_2_output_0.out0_0_86_bfp.out1_55", "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_89", "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_179_bfp.out25_91", "/transformer_blocks.21/Add_3_output_0.out10_55", "/Add_21_output_0.out_35_1_25", "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43", "/transformer_blocks.21/Add_4_output_0.out10_56", "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87", "/transformer_blocks.21/Add_6_output_0.out0_0_87_bfp.out1_57", "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_90", "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_180_bfp.out25_92", "/transformer_blocks.21/Add_7_output_0.out10_57", "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89", "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58", "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88", "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60", "/transformer_blocks.22/attn/Concat_output_0.out22_22_bfp.out23_66", "/transformer_blocks.22/attn/Concat_1_output_0.out22_22_bfp.out23_67", "/transformer_blocks.22/attn/Concat_2_output_0.out22_22_bfp.out23_68", "/transformer_blocks.22/attn/Reshape_3_output_0.out22_22_bfp.out27_0_22", "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44", "/transformer_blocks.22/Add_output_0.out10_58", "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90", "/transformer_blocks.22/Add_2_output_0.out0_0_90_bfp.out1_59", "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_93", "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_187_bfp.out25_95", "/transformer_blocks.22/Add_3_output_0.out10_59", "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45", "/transformer_blocks.22/Add_4_output_0.out10_60", "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91", "/transformer_blocks.22/Add_6_output_0.out0_0_91_bfp.out1_61", "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_94", "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_188_bfp.out25_96", "/transformer_blocks.22/Add_7_output_0.out10_61", "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92", "/Add_22_output_0.out_35_1_26", "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93", "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62", "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64", "/transformer_blocks.23/attn/Concat_output_0.out22_23_bfp.out23_69", "/transformer_blocks.23/attn/Concat_1_output_0.out22_23_bfp.out23_70", "/transformer_blocks.23/attn/Concat_2_output_0.out22_23_bfp.out23_71", "/transformer_blocks.23/attn/Reshape_3_output_0.out22_23_bfp.out27_0_23", "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46", "/transformer_blocks.23/Add_output_0.out10_62", "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94", "/transformer_blocks.23/Add_2_output_0.out0_0_94_bfp.out1_63", "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_97", "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_195_bfp.out25_98", "/transformer_blocks.23/Add_3_output_0.out10_63", "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95", "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93", "/norm_out/Add_2_output_0.out0_0_95" ] }, "const": { "buffer_size": 2313015936, "xrt_arg_id": 3, "packed_tensors": [ "existing_model.pos_embed.proj.weight", "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0", "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1", "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1", "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2", "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0", "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3", "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2", "encoder_hidden_states.out17_3_0_bfp.wts", "onnx::MatMul_7905", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts", "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1", "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1", "onnx::MatMul_7909_onnx::MatMul_7906", "onnx::MatMul_7910_onnx::MatMul_7907", "onnx::MatMul_7911_onnx::MatMul_7908", "onnx::MatMul_7926", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma", "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4", "onnx::MatMul_7929", "onnx::MatMul_7930", "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma", "onnx::MatMul_7925", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma", "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4", "onnx::MatMul_7927", "onnx::MatMul_7928", "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma", "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1", "onnx::MatMul_7934_onnx::MatMul_7931", "onnx::MatMul_7935_onnx::MatMul_7932", "onnx::MatMul_7936_onnx::MatMul_7933", "onnx::MatMul_7950", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma", "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4", "onnx::MatMul_7952", "onnx::MatMul_7953", "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma", "onnx::MatMul_7951", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma", "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4", "onnx::MatMul_7954", "onnx::MatMul_7955", "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma", "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1", "onnx::MatMul_7959_onnx::MatMul_7956", "onnx::MatMul_7960_onnx::MatMul_7957", "onnx::MatMul_7961_onnx::MatMul_7958", "onnx::MatMul_7976", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma", "onnx::MatMul_7975", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma", "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4", "onnx::MatMul_7977", "onnx::MatMul_7978", "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4", "onnx::MatMul_7979", "onnx::MatMul_7980", "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma", "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma", "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1", "onnx::MatMul_7984_onnx::MatMul_7981", "onnx::MatMul_7985_onnx::MatMul_7982", "onnx::MatMul_7986_onnx::MatMul_7983", "onnx::MatMul_8001", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma", "onnx::MatMul_8000", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma", "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4", "onnx::MatMul_8002", "onnx::MatMul_8003", "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma", "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4", "onnx::MatMul_8004", "onnx::MatMul_8005", "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma", "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1", "onnx::MatMul_8009_onnx::MatMul_8006", "onnx::MatMul_8010_onnx::MatMul_8007", "onnx::MatMul_8011_onnx::MatMul_8008", "onnx::MatMul_8026", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma", "onnx::MatMul_8025", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma", "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4", "onnx::MatMul_8027", "onnx::MatMul_8028", "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4", "onnx::MatMul_8029", "onnx::MatMul_8030", "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma", "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma", "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1", "onnx::MatMul_8034_onnx::MatMul_8031", "onnx::MatMul_8035_onnx::MatMul_8032", "onnx::MatMul_8036_onnx::MatMul_8033", "onnx::MatMul_8051", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma", "onnx::MatMul_8050", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma", "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4", "onnx::MatMul_8052", "onnx::MatMul_8053", "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma", "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4", "onnx::MatMul_8054", "onnx::MatMul_8055", "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma", "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1", "onnx::MatMul_8059_onnx::MatMul_8056", "onnx::MatMul_8060_onnx::MatMul_8057", "onnx::MatMul_8061_onnx::MatMul_8058", "onnx::MatMul_8076", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma", "onnx::MatMul_8075", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma", "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4", "onnx::MatMul_8077", "onnx::MatMul_8078", "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4", "onnx::MatMul_8079", "onnx::MatMul_8080", "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma", "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma", "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1", "onnx::MatMul_8084_onnx::MatMul_8081", "onnx::MatMul_8085_onnx::MatMul_8082", "onnx::MatMul_8086_onnx::MatMul_8083", "onnx::MatMul_8101", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma", "onnx::MatMul_8100", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma", "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4", "onnx::MatMul_8102", "onnx::MatMul_8103", "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma", "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4", "onnx::MatMul_8104", "onnx::MatMul_8105", "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma", "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1", "onnx::MatMul_8109_onnx::MatMul_8106", "onnx::MatMul_8110_onnx::MatMul_8107", "onnx::MatMul_8111_onnx::MatMul_8108", "onnx::MatMul_8126", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma", "onnx::MatMul_8125", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma", "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4", "onnx::MatMul_8127", "onnx::MatMul_8128", "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4", "onnx::MatMul_8129", "onnx::MatMul_8130", "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma", "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma", "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1", "onnx::MatMul_8134_onnx::MatMul_8131", "onnx::MatMul_8135_onnx::MatMul_8132", "onnx::MatMul_8136_onnx::MatMul_8133", "onnx::MatMul_8151", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma", "onnx::MatMul_8150", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma", "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4", "onnx::MatMul_8152", "onnx::MatMul_8153", "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma", "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1", "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4", "onnx::MatMul_8154", "onnx::MatMul_8155", "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma", "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1", "onnx::MatMul_8159_onnx::MatMul_8156", "onnx::MatMul_8160_onnx::MatMul_8157", "onnx::MatMul_8161_onnx::MatMul_8158", "onnx::MatMul_8175", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma", "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4", "onnx::MatMul_8177", "onnx::MatMul_8178", "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma", "onnx::MatMul_8176", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma", "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4", "onnx::MatMul_8179", "onnx::MatMul_8180", "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma", "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1", "onnx::MatMul_8184_onnx::MatMul_8181", "onnx::MatMul_8185_onnx::MatMul_8182", "onnx::MatMul_8186_onnx::MatMul_8183", "onnx::MatMul_8200", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma", "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4", "onnx::MatMul_8202", "onnx::MatMul_8203", "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma", "onnx::MatMul_8201", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma", "/transformer_blocks.11/norm2_context/Constant_output_0", "/transformer_blocks.11/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4", "onnx::MatMul_8204", "onnx::MatMul_8205", "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma", "/transformer_blocks.12/norm1/norm/Constant_output_0", "/transformer_blocks.12/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1", "/transformer_blocks.12/norm1_context/norm/Constant_output_0", "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1", "onnx::MatMul_8209_onnx::MatMul_8206", "onnx::MatMul_8210_onnx::MatMul_8207", "onnx::MatMul_8211_onnx::MatMul_8208", "onnx::MatMul_8225", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma", "/transformer_blocks.12/norm2/Constant_output_0", "/transformer_blocks.12/norm2/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4", "onnx::MatMul_8227", "onnx::MatMul_8228", "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma", "onnx::MatMul_8226", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma", "/transformer_blocks.12/norm2_context/Constant_output_0", "/transformer_blocks.12/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4", "onnx::MatMul_8229", "onnx::MatMul_8230", "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma", "/transformer_blocks.13/norm1_context/norm/Constant_output_0", "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.13/norm1/norm/Constant_output_0", "/transformer_blocks.13/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1", "onnx::MatMul_8234_onnx::MatMul_8231", "onnx::MatMul_8235_onnx::MatMul_8232", "onnx::MatMul_8236_onnx::MatMul_8233", "onnx::MatMul_8250", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma", "/transformer_blocks.13/norm2/Constant_output_0", "/transformer_blocks.13/norm2/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4", "onnx::MatMul_8252", "onnx::MatMul_8253", "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma", "onnx::MatMul_8251", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma", "/transformer_blocks.13/norm2_context/Constant_output_0", "/transformer_blocks.13/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4", "onnx::MatMul_8254", "onnx::MatMul_8255", "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma", "/transformer_blocks.14/norm1/norm/Constant_output_0", "/transformer_blocks.14/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1", "/transformer_blocks.14/norm1_context/norm/Constant_output_0", "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1", "onnx::MatMul_8259_onnx::MatMul_8256", "onnx::MatMul_8260_onnx::MatMul_8257", "onnx::MatMul_8261_onnx::MatMul_8258", "onnx::MatMul_8275", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma", "/transformer_blocks.14/norm2/Constant_output_0", "/transformer_blocks.14/norm2/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4", "onnx::MatMul_8277", "onnx::MatMul_8278", "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma", "onnx::MatMul_8276", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma", "/transformer_blocks.14/norm2_context/Constant_output_0", "/transformer_blocks.14/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4", "onnx::MatMul_8279", "onnx::MatMul_8280", "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma", "/transformer_blocks.15/norm1_context/norm/Constant_output_0", "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.15/norm1/norm/Constant_output_0", "/transformer_blocks.15/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1", "onnx::MatMul_8284_onnx::MatMul_8281", "onnx::MatMul_8285_onnx::MatMul_8282", "onnx::MatMul_8286_onnx::MatMul_8283", "onnx::MatMul_8300", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma", "/transformer_blocks.15/norm2/Constant_output_0", "/transformer_blocks.15/norm2/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4", "onnx::MatMul_8302", "onnx::MatMul_8303", "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma", "onnx::MatMul_8301", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma", "/transformer_blocks.15/norm2_context/Constant_output_0", "/transformer_blocks.15/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4", "onnx::MatMul_8304", "onnx::MatMul_8305", "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma", "/transformer_blocks.16/norm1/norm/Constant_output_0", "/transformer_blocks.16/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1", "/transformer_blocks.16/norm1_context/norm/Constant_output_0", "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1", "onnx::MatMul_8309_onnx::MatMul_8306", "onnx::MatMul_8310_onnx::MatMul_8307", "onnx::MatMul_8311_onnx::MatMul_8308", "onnx::MatMul_8325", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma", "/transformer_blocks.16/norm2/Constant_output_0", "/transformer_blocks.16/norm2/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4", "onnx::MatMul_8327", "onnx::MatMul_8328", "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma", "onnx::MatMul_8326", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma", "/transformer_blocks.16/norm2_context/Constant_output_0", "/transformer_blocks.16/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4", "onnx::MatMul_8329", "onnx::MatMul_8330", "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma", "/transformer_blocks.17/norm1_context/norm/Constant_output_0", "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.17/norm1/norm/Constant_output_0", "/transformer_blocks.17/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1", "onnx::MatMul_8334_onnx::MatMul_8331", "onnx::MatMul_8335_onnx::MatMul_8332", "onnx::MatMul_8336_onnx::MatMul_8333", "onnx::MatMul_8350", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma", "/transformer_blocks.17/norm2/Constant_output_0", "/transformer_blocks.17/norm2/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4", "onnx::MatMul_8352", "onnx::MatMul_8353", "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma", "onnx::MatMul_8351", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma", "/transformer_blocks.17/norm2_context/Constant_output_0", "/transformer_blocks.17/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4", "onnx::MatMul_8354", "onnx::MatMul_8355", "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma", "/transformer_blocks.18/norm1/norm/Constant_output_0", "/transformer_blocks.18/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1", "/transformer_blocks.18/norm1_context/norm/Constant_output_0", "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1", "onnx::MatMul_8359_onnx::MatMul_8356", "onnx::MatMul_8360_onnx::MatMul_8357", "onnx::MatMul_8361_onnx::MatMul_8358", "onnx::MatMul_8375", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma", "/transformer_blocks.18/norm2/Constant_output_0", "/transformer_blocks.18/norm2/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4", "onnx::MatMul_8377", "onnx::MatMul_8378", "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma", "onnx::MatMul_8376", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma", "/transformer_blocks.18/norm2_context/Constant_output_0", "/transformer_blocks.18/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4", "onnx::MatMul_8379", "onnx::MatMul_8380", "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma", "/transformer_blocks.19/norm1_context/norm/Constant_output_0", "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.19/norm1/norm/Constant_output_0", "/transformer_blocks.19/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1", "onnx::MatMul_8384_onnx::MatMul_8381", "onnx::MatMul_8385_onnx::MatMul_8382", "onnx::MatMul_8386_onnx::MatMul_8383", "onnx::MatMul_8400", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma", "/transformer_blocks.19/norm2/Constant_output_0", "/transformer_blocks.19/norm2/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4", "onnx::MatMul_8402", "onnx::MatMul_8403", "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma", "onnx::MatMul_8401", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma", "/transformer_blocks.19/norm2_context/Constant_output_0", "/transformer_blocks.19/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4", "onnx::MatMul_8404", "onnx::MatMul_8405", "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma", "/transformer_blocks.20/norm1/norm/Constant_output_0", "/transformer_blocks.20/norm1/norm/Constant_1_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_output_0", "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1", "onnx::MatMul_8409_onnx::MatMul_8406", "onnx::MatMul_8410_onnx::MatMul_8407", "onnx::MatMul_8411_onnx::MatMul_8408", "onnx::MatMul_8426", "onnx::MatMul_8425", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma", "/transformer_blocks.20/norm2/Constant_output_0", "/transformer_blocks.20/norm2/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4", "onnx::MatMul_8427", "onnx::MatMul_8428", "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma", "/transformer_blocks.20/norm2_context/Constant_output_0", "/transformer_blocks.20/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4", "onnx::MatMul_8429", "onnx::MatMul_8430", "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma", "/transformer_blocks.21/norm1_context/norm/Constant_output_0", "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.21/norm1/norm/Constant_output_0", "/transformer_blocks.21/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1", "onnx::MatMul_8434_onnx::MatMul_8431", "onnx::MatMul_8435_onnx::MatMul_8432", "onnx::MatMul_8436_onnx::MatMul_8433", "onnx::MatMul_8450", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma", "/transformer_blocks.21/norm2/Constant_output_0", "/transformer_blocks.21/norm2/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4", "onnx::MatMul_8452", "onnx::MatMul_8453", "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma", "onnx::MatMul_8451", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma", "/transformer_blocks.21/norm2_context/Constant_output_0", "/transformer_blocks.21/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4", "onnx::MatMul_8454", "onnx::MatMul_8455", "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma", "/transformer_blocks.22/norm1/norm/Constant_output_0", "/transformer_blocks.22/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1", "/transformer_blocks.22/norm1_context/norm/Constant_output_0", "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1", "onnx::MatMul_8459_onnx::MatMul_8456", "onnx::MatMul_8460_onnx::MatMul_8457", "onnx::MatMul_8461_onnx::MatMul_8458", "onnx::MatMul_8475", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma", "/transformer_blocks.22/norm2/Constant_output_0", "/transformer_blocks.22/norm2/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4", "onnx::MatMul_8477", "onnx::MatMul_8478", "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma", "onnx::MatMul_8476", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma", "/transformer_blocks.22/norm2_context/Constant_output_0", "/transformer_blocks.22/norm2_context/Constant_1_output_0", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4", "onnx::MatMul_8479", "onnx::MatMul_8480", "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma", "/transformer_blocks.23/norm1_context/norm/Constant_output_0", "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0", "/transformer_blocks.23/norm1/norm/Constant_output_0", "/transformer_blocks.23/norm1/norm/Constant_1_output_0", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1", "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1", "onnx::MatMul_8484_onnx::MatMul_8481", "onnx::MatMul_8485_onnx::MatMul_8482", "onnx::MatMul_8486_onnx::MatMul_8483", "onnx::MatMul_8497", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma", "/transformer_blocks.23/norm2/Constant_output_0", "/transformer_blocks.23/norm2/Constant_1_output_0", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4", "onnx::MatMul_8498", "onnx::MatMul_8499", "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma", "/norm_out/norm/Constant_output_0", "/norm_out/norm/Constant_1_output_0", "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1", "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93_bfp.wts", "onnx::MatMul_8500" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] } }, "tensor_map": { "hidden_states_nhwc.out5_0_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_size", "w", "h", "False" ], "offset": 0 }, "/pos_embed/Reshape_1_output_0.out_35_1_2": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "False", "state_dim1", "False" ], "offset": 32 }, "/time_text_embed/Cast_output_0.out17_3_3": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 256 ], "size_in_bytes": 512, "op_tensor_size": 512, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3104 }, "pooled_projections.out17_3_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 2048 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3616 }, "encoder_hidden_states.out17_3_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 4096 ], "size_in_bytes": 8192, "op_tensor_size": 8192, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 7712 }, "block_controlnet_hidden_states_0.out_35_1_4": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 18976 }, "block_controlnet_hidden_states_1.out_35_1_6": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 25120 }, "block_controlnet_hidden_states_2.out_35_1_8": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 31264 }, "block_controlnet_hidden_states_3.out_35_1_10": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 37408 }, "block_controlnet_hidden_states_4.out_35_1_12": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 43552 }, "block_controlnet_hidden_states_5.out_35_1_14": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 49696 }, "block_controlnet_hidden_states_6.out_35_1_16": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 55840 }, "block_controlnet_hidden_states_7.out_35_1_18": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 61984 }, "block_controlnet_hidden_states_8.out_35_1_20": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 68128 }, "block_controlnet_hidden_states_9.out_35_1_22": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 74272 }, "block_controlnet_hidden_states_10.out_35_1_24": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 80416 }, "block_controlnet_hidden_states_11.out_35_1_26": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 83488 }, "/Reshape_output_0.out17_0_0": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1, 2, 2, 16 ], "size_in_bytes": 128, "op_tensor_size": 128, "dynamic_shapes": [ "batch_size", "floor(w/2)", "floor(h/2)", "False", "False", "False" ], "offset": 0 }, "/pos_embed/Transpose_output_0.out5_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 0 }, "/pos_embed/Add_2_output_0.out_35_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 3072 }, "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 6144 }, "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 9216 }, "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 12288 }, "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 15360 }, "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 18432 }, "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 21504 }, "/time_text_embed/Add_output_0.out_35_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 24576 }, "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 27648 }, "encoder_hidden_states.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 4096 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 30720 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 34816 }, "/context_embedder/Add_output_0.out17_3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 36352 }, "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 39424 }, "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 40960 }, "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 42496 }, "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 44032 }, "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 45568 }, "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 47104 }, "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 48640 }, "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 50176 }, "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 51712 }, "/transformer_blocks.0/Add_4_output_0.out10_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 53248 }, "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 56320 }, "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 57856 }, "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 59392 }, "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 65536 }, "/transformer_blocks.0/Add_7_output_0.out10_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 67072 }, "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 70144 }, "/transformer_blocks.0/Add_output_0.out10_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 71680 }, "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 74752 }, "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 76288 }, "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 77824 }, "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 83968 }, "/transformer_blocks.0/Add_3_output_0.out10_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 85504 }, "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 88576 }, "/Add_output_0.out_35_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 90112 }, "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 93184 }, "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_5_bfp.out1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 94720 }, "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_4_bfp.out1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 96256 }, "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 97792 }, "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 99328 }, "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 100864 }, "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 102400 }, "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 103936 }, "/transformer_blocks.1/Add_output_0.out10_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 105472 }, "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 108544 }, "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 110080 }, "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 111616 }, "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_19_bfp.out25_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 117760 }, "/transformer_blocks.1/Add_3_output_0.out10_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 119296 }, "/Add_1_output_0.out_35_1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 122368 }, "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 125440 }, "/transformer_blocks.1/Add_4_output_0.out10_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 126976 }, "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 130048 }, "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 131584 }, "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 133120 }, "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_20_bfp.out25_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 139264 }, "/transformer_blocks.1/Add_7_output_0.out10_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 140800 }, "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 143872 }, "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 145408 }, "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_9_bfp.out1_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 146944 }, "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_8_bfp.out1_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 148480 }, "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 150016 }, "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 151552 }, "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 153088 }, "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 154624 }, "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 156160 }, "/transformer_blocks.2/Add_4_output_0.out10_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 157696 }, "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 160768 }, "/transformer_blocks.2/Add_output_0.out10_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 162304 }, "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 165376 }, "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 166912 }, "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 168448 }, "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_27_bfp.out25_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 174592 }, "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 176128 }, "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 177664 }, "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 179200 }, "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_28_bfp.out25_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 185344 }, "/transformer_blocks.2/Add_7_output_0.out10_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 186880 }, "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 189952 }, "/transformer_blocks.2/Add_3_output_0.out10_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 191488 }, "/Add_2_output_0.out_35_1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 194560 }, "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 197632 }, "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_13_bfp.out1_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 199168 }, "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_12_bfp.out1_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 200704 }, "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 202240 }, "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 203776 }, "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 205312 }, "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 206848 }, "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 208384 }, "/transformer_blocks.3/Add_4_output_0.out10_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 209920 }, "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 212992 }, "/transformer_blocks.3/Add_output_0.out10_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 214528 }, "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 217600 }, "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 219136 }, "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 220672 }, "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_35_bfp.out25_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 226816 }, "/transformer_blocks.3/Add_3_output_0.out10_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 228352 }, "/Add_3_output_0.out_35_1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 231424 }, "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 234496 }, "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 236032 }, "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 237568 }, "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_36_bfp.out25_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 243712 }, "/transformer_blocks.3/Add_7_output_0.out10_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 245248 }, "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 248320 }, "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 249856 }, "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_17_bfp.out1_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 251392 }, "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_16_bfp.out1_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 252928 }, "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 254464 }, "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 256000 }, "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 257536 }, "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 259072 }, "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 260608 }, "/transformer_blocks.4/Add_4_output_0.out10_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 262144 }, "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 265216 }, "/transformer_blocks.4/Add_output_0.out10_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 266752 }, "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 269824 }, "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 271360 }, "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 272896 }, "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_43_bfp.out25_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 279040 }, "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 280576 }, "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 282112 }, "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 283648 }, "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_44_bfp.out25_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 289792 }, "/transformer_blocks.4/Add_7_output_0.out10_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 291328 }, "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 294400 }, "/transformer_blocks.4/Add_3_output_0.out10_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 295936 }, "/Add_4_output_0.out_35_1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 299008 }, "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 302080 }, "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_21_bfp.out1_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 303616 }, "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_20_bfp.out1_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 305152 }, "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 306688 }, "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 308224 }, "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 309760 }, "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 311296 }, "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 312832 }, "/transformer_blocks.5/Add_4_output_0.out10_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 314368 }, "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 317440 }, "/transformer_blocks.5/Add_output_0.out10_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 318976 }, "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 322048 }, "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 323584 }, "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 325120 }, "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_51_bfp.out25_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 331264 }, "/transformer_blocks.5/Add_3_output_0.out10_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 332800 }, "/Add_5_output_0.out_35_1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 335872 }, "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 338944 }, "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 340480 }, "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 342016 }, "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_52_bfp.out25_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 348160 }, "/transformer_blocks.5/Add_7_output_0.out10_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 349696 }, "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_25_bfp.out15_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 352768 }, "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_24_bfp.out15_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 354304 }, "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_25_bfp.out1_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 355840 }, "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_24_bfp.out1_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 357376 }, "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 358912 }, "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 360448 }, "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 361984 }, "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 363520 }, "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 365056 }, "/transformer_blocks.6/Add_4_output_0.out10_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 366592 }, "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 369664 }, "/transformer_blocks.6/Add_output_0.out10_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 371200 }, "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 374272 }, "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 375808 }, "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 377344 }, "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_59_bfp.out25_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 383488 }, "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 385024 }, "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 386560 }, "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 388096 }, "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_60_bfp.out25_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 394240 }, "/transformer_blocks.6/Add_7_output_0.out10_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 395776 }, "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_28_bfp.out15_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 398848 }, "/transformer_blocks.6/Add_3_output_0.out10_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 400384 }, "/Add_6_output_0.out_35_1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 403456 }, "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_29_bfp.out15_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 406528 }, "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_29_bfp.out1_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 408064 }, "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_28_bfp.out1_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 409600 }, "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 411136 }, "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 412672 }, "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 414208 }, "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 415744 }, "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 417280 }, "/transformer_blocks.7/Add_4_output_0.out10_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 418816 }, "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 421888 }, "/transformer_blocks.7/Add_output_0.out10_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 423424 }, "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 426496 }, "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 428032 }, "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 429568 }, "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_67_bfp.out25_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 435712 }, "/transformer_blocks.7/Add_3_output_0.out10_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 437248 }, "/Add_7_output_0.out_35_1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 440320 }, "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 443392 }, "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 444928 }, "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 446464 }, "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_68_bfp.out25_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 452608 }, "/transformer_blocks.7/Add_7_output_0.out10_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 454144 }, "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_33_bfp.out15_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 457216 }, "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_32_bfp.out15_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 458752 }, "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_33_bfp.out1_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 460288 }, "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_32_bfp.out1_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 461824 }, "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 463360 }, "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 464896 }, "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 466432 }, "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 467968 }, "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 469504 }, "/transformer_blocks.8/Add_4_output_0.out10_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 471040 }, "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 474112 }, "/transformer_blocks.8/Add_output_0.out10_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 475648 }, "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 478720 }, "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 480256 }, "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 481792 }, "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_75_bfp.out25_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 487936 }, "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 489472 }, "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 491008 }, "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 492544 }, "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_76_bfp.out25_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 498688 }, "/transformer_blocks.8/Add_7_output_0.out10_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 500224 }, "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_36_bfp.out15_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 503296 }, "/transformer_blocks.8/Add_3_output_0.out10_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 504832 }, "/Add_8_output_0.out_35_1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 507904 }, "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_37_bfp.out15_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 510976 }, "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_37_bfp.out1_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 512512 }, "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_36_bfp.out1_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 514048 }, "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 515584 }, "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 517120 }, "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 518656 }, "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 520192 }, "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 521728 }, "/transformer_blocks.9/Add_4_output_0.out10_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 523264 }, "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 526336 }, "/transformer_blocks.9/Add_output_0.out10_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 527872 }, "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 530944 }, "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 532480 }, "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 534016 }, "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_83_bfp.out25_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 540160 }, "/transformer_blocks.9/Add_3_output_0.out10_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 541696 }, "/Add_9_output_0.out_35_1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 544768 }, "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 547840 }, "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 549376 }, "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 550912 }, "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 552448 }, "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 553984 }, "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 560128 }, "/transformer_blocks.9/Add_7_output_0.out10_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 561664 }, "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 564736 }, "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 566272 }, "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 567808 }, "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 569344 }, "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 570880 }, "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 572416 }, "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 573952 }, "/transformer_blocks.10/Add_output_0.out10_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 575488 }, "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 578560 }, "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 580096 }, "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 581632 }, "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_91_bfp.out25_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 587776 }, "/transformer_blocks.10/Add_3_output_0.out10_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 589312 }, "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 592384 }, "/transformer_blocks.10/Add_4_output_0.out10_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 593920 }, "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 596992 }, "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 598528 }, "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 600064 }, "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_92_bfp.out25_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 606208 }, "/transformer_blocks.10/Add_7_output_0.out10_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 607744 }, "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_44_bfp.out15_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 610816 }, "/Add_10_output_0.out_35_1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 612352 }, "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_45_bfp.out15_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 615424 }, "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_45_bfp.out1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 616960 }, "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_44_bfp.out1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 618496 }, "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 620032 }, "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 621568 }, "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 623104 }, "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 624640 }, "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 626176 }, "/transformer_blocks.11/Add_output_0.out10_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 627712 }, "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 630784 }, "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 632320 }, "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 633856 }, "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_99_bfp.out25_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 640000 }, "/transformer_blocks.11/Add_3_output_0.out10_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 641536 }, "/Add_11_output_0.out_35_1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 644608 }, "/transformer_blocks.11/attn/to_add_out/Add_output_0.out6_1_23_bfp.out7_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 647680 }, "/transformer_blocks.11/Add_4_output_0.out10_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 649216 }, "/transformer_blocks.11/norm2_context/LayerNormalization_output_0.out14_47_bfp.out15_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 652288 }, "/transformer_blocks.11/Add_6_output_0.out0_0_47_bfp.out1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 653824 }, "/transformer_blocks.11/ff_context/net.0/Mul_5_output_0.out17_2_23_bfp.out25_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 655360 }, "/transformer_blocks.11/ff_context/net.2/Add_output_0.out17_3_100_bfp.out25_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 661504 }, "/transformer_blocks.11/Add_7_output_0.out10_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 663040 }, "/transformer_blocks.12/norm1/norm/LayerNormalization_output_0.out14_49_bfp.out15_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 666112 }, "/transformer_blocks.12/norm1/Add_2_output_0.out0_0_49_bfp.out1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 667648 }, "/transformer_blocks.12/norm1_context/norm/LayerNormalization_output_0.out14_48_bfp.out15_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 669184 }, "/transformer_blocks.12/norm1_context/Add_2_output_0.out0_0_48_bfp.out1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 670720 }, "/transformer_blocks.12/attn/Concat_output_0.out22_12_bfp.out23_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 672256 }, "/transformer_blocks.12/attn/Concat_1_output_0.out22_12_bfp.out23_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 673792 }, "/transformer_blocks.12/attn/Concat_2_output_0.out22_12_bfp.out23_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 675328 }, "/transformer_blocks.12/attn/Reshape_3_output_0.out22_12_bfp.out27_0_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 676864 }, "/transformer_blocks.12/attn/to_out.0/Add_output_0.out6_1_24_bfp.out7_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 678400 }, "/transformer_blocks.12/Add_output_0.out10_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 679936 }, "/transformer_blocks.12/norm2/LayerNormalization_output_0.out14_50_bfp.out15_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 683008 }, "/transformer_blocks.12/Add_2_output_0.out0_0_50_bfp.out1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 684544 }, "/transformer_blocks.12/ff/net.0/Mul_5_output_0.out17_2_24_bfp.out25_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 686080 }, "/transformer_blocks.12/ff/net.2/Add_output_0.out17_3_107_bfp.out25_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 692224 }, "/transformer_blocks.12/Add_3_output_0.out10_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 693760 }, "/transformer_blocks.12/attn/to_add_out/Add_output_0.out6_1_25_bfp.out7_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 696832 }, "/transformer_blocks.12/Add_4_output_0.out10_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 698368 }, "/transformer_blocks.12/norm2_context/LayerNormalization_output_0.out14_51_bfp.out15_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 701440 }, "/transformer_blocks.12/Add_6_output_0.out0_0_51_bfp.out1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 702976 }, "/transformer_blocks.12/ff_context/net.0/Mul_5_output_0.out17_2_25_bfp.out25_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 704512 }, "/transformer_blocks.12/ff_context/net.2/Add_output_0.out17_3_108_bfp.out25_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 710656 }, "/transformer_blocks.12/Add_7_output_0.out10_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 712192 }, "/transformer_blocks.13/norm1_context/norm/LayerNormalization_output_0.out14_52_bfp.out15_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 715264 }, "/Add_12_output_0.out_35_1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 716800 }, "/transformer_blocks.13/norm1/norm/LayerNormalization_output_0.out14_53_bfp.out15_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 719872 }, "/transformer_blocks.13/norm1/Add_2_output_0.out0_0_53_bfp.out1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 721408 }, "/transformer_blocks.13/norm1_context/Add_2_output_0.out0_0_52_bfp.out1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 722944 }, "/transformer_blocks.13/attn/Concat_output_0.out22_13_bfp.out23_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 724480 }, "/transformer_blocks.13/attn/Concat_1_output_0.out22_13_bfp.out23_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 726016 }, "/transformer_blocks.13/attn/Concat_2_output_0.out22_13_bfp.out23_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 727552 }, "/transformer_blocks.13/attn/Reshape_3_output_0.out22_13_bfp.out27_0_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 729088 }, "/transformer_blocks.13/attn/to_out.0/Add_output_0.out6_1_26_bfp.out7_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 730624 }, "/transformer_blocks.13/Add_output_0.out10_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 732160 }, "/transformer_blocks.13/norm2/LayerNormalization_output_0.out14_54_bfp.out15_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 735232 }, "/transformer_blocks.13/Add_2_output_0.out0_0_54_bfp.out1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 736768 }, "/transformer_blocks.13/ff/net.0/Mul_5_output_0.out17_2_26_bfp.out25_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 738304 }, "/transformer_blocks.13/ff/net.2/Add_output_0.out17_3_115_bfp.out25_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 744448 }, "/transformer_blocks.13/Add_3_output_0.out10_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 745984 }, "/Add_13_output_0.out_35_1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 749056 }, "/transformer_blocks.13/attn/to_add_out/Add_output_0.out6_1_27_bfp.out7_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 752128 }, "/transformer_blocks.13/Add_4_output_0.out10_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 753664 }, "/transformer_blocks.13/norm2_context/LayerNormalization_output_0.out14_55_bfp.out15_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 756736 }, "/transformer_blocks.13/Add_6_output_0.out0_0_55_bfp.out1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 758272 }, "/transformer_blocks.13/ff_context/net.0/Mul_5_output_0.out17_2_27_bfp.out25_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 759808 }, "/transformer_blocks.13/ff_context/net.2/Add_output_0.out17_3_116_bfp.out25_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 765952 }, "/transformer_blocks.13/Add_7_output_0.out10_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 767488 }, "/transformer_blocks.14/norm1/norm/LayerNormalization_output_0.out14_57_bfp.out15_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 770560 }, "/transformer_blocks.14/norm1/Add_2_output_0.out0_0_57_bfp.out1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 772096 }, "/transformer_blocks.14/norm1_context/norm/LayerNormalization_output_0.out14_56_bfp.out15_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 773632 }, "/transformer_blocks.14/norm1_context/Add_2_output_0.out0_0_56_bfp.out1_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 775168 }, "/transformer_blocks.14/attn/Concat_output_0.out22_14_bfp.out23_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 776704 }, "/transformer_blocks.14/attn/Concat_1_output_0.out22_14_bfp.out23_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 778240 }, "/transformer_blocks.14/attn/Concat_2_output_0.out22_14_bfp.out23_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 779776 }, "/transformer_blocks.14/attn/Reshape_3_output_0.out22_14_bfp.out27_0_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 781312 }, "/transformer_blocks.14/attn/to_out.0/Add_output_0.out6_1_28_bfp.out7_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 782848 }, "/transformer_blocks.14/Add_output_0.out10_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 784384 }, "/transformer_blocks.14/norm2/LayerNormalization_output_0.out14_58_bfp.out15_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 787456 }, "/transformer_blocks.14/Add_2_output_0.out0_0_58_bfp.out1_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 788992 }, "/transformer_blocks.14/ff/net.0/Mul_5_output_0.out17_2_28_bfp.out25_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 790528 }, "/transformer_blocks.14/ff/net.2/Add_output_0.out17_3_123_bfp.out25_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 796672 }, "/transformer_blocks.14/Add_3_output_0.out10_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 798208 }, "/transformer_blocks.14/attn/to_add_out/Add_output_0.out6_1_29_bfp.out7_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 801280 }, "/transformer_blocks.14/Add_4_output_0.out10_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 802816 }, "/transformer_blocks.14/norm2_context/LayerNormalization_output_0.out14_59_bfp.out15_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 805888 }, "/transformer_blocks.14/Add_6_output_0.out0_0_59_bfp.out1_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 807424 }, "/transformer_blocks.14/ff_context/net.0/Mul_5_output_0.out17_2_29_bfp.out25_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 808960 }, "/transformer_blocks.14/ff_context/net.2/Add_output_0.out17_3_124_bfp.out25_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 815104 }, "/transformer_blocks.14/Add_7_output_0.out10_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 816640 }, "/transformer_blocks.15/norm1_context/norm/LayerNormalization_output_0.out14_60_bfp.out15_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 819712 }, "/Add_14_output_0.out_35_1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 821248 }, "/transformer_blocks.15/norm1/norm/LayerNormalization_output_0.out14_61_bfp.out15_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 824320 }, "/transformer_blocks.15/norm1/Add_2_output_0.out0_0_61_bfp.out1_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 825856 }, "/transformer_blocks.15/norm1_context/Add_2_output_0.out0_0_60_bfp.out1_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 827392 }, "/transformer_blocks.15/attn/Concat_output_0.out22_15_bfp.out23_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 828928 }, "/transformer_blocks.15/attn/Concat_1_output_0.out22_15_bfp.out23_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 830464 }, "/transformer_blocks.15/attn/Concat_2_output_0.out22_15_bfp.out23_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 832000 }, "/transformer_blocks.15/attn/Reshape_3_output_0.out22_15_bfp.out27_0_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 833536 }, "/transformer_blocks.15/attn/to_out.0/Add_output_0.out6_1_30_bfp.out7_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 835072 }, "/transformer_blocks.15/Add_output_0.out10_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 836608 }, "/transformer_blocks.15/norm2/LayerNormalization_output_0.out14_62_bfp.out15_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 839680 }, "/transformer_blocks.15/Add_2_output_0.out0_0_62_bfp.out1_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 841216 }, "/transformer_blocks.15/ff/net.0/Mul_5_output_0.out17_2_30_bfp.out25_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 842752 }, "/transformer_blocks.15/ff/net.2/Add_output_0.out17_3_131_bfp.out25_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 848896 }, "/transformer_blocks.15/Add_3_output_0.out10_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 850432 }, "/Add_15_output_0.out_35_1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 853504 }, "/transformer_blocks.15/attn/to_add_out/Add_output_0.out6_1_31_bfp.out7_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 856576 }, "/transformer_blocks.15/Add_4_output_0.out10_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 858112 }, "/transformer_blocks.15/norm2_context/LayerNormalization_output_0.out14_63_bfp.out15_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 861184 }, "/transformer_blocks.15/Add_6_output_0.out0_0_63_bfp.out1_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 862720 }, "/transformer_blocks.15/ff_context/net.0/Mul_5_output_0.out17_2_31_bfp.out25_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 864256 }, "/transformer_blocks.15/ff_context/net.2/Add_output_0.out17_3_132_bfp.out25_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 870400 }, "/transformer_blocks.15/Add_7_output_0.out10_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 871936 }, "/transformer_blocks.16/norm1/norm/LayerNormalization_output_0.out14_65_bfp.out15_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 875008 }, "/transformer_blocks.16/norm1/Add_2_output_0.out0_0_65_bfp.out1_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 876544 }, "/transformer_blocks.16/norm1_context/norm/LayerNormalization_output_0.out14_64_bfp.out15_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 878080 }, "/transformer_blocks.16/norm1_context/Add_2_output_0.out0_0_64_bfp.out1_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 879616 }, "/transformer_blocks.16/attn/Concat_output_0.out22_16_bfp.out23_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 881152 }, "/transformer_blocks.16/attn/Concat_1_output_0.out22_16_bfp.out23_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 882688 }, "/transformer_blocks.16/attn/Concat_2_output_0.out22_16_bfp.out23_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 884224 }, "/transformer_blocks.16/attn/Reshape_3_output_0.out22_16_bfp.out27_0_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 885760 }, "/transformer_blocks.16/attn/to_out.0/Add_output_0.out6_1_32_bfp.out7_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 887296 }, "/transformer_blocks.16/Add_output_0.out10_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 888832 }, "/transformer_blocks.16/norm2/LayerNormalization_output_0.out14_66_bfp.out15_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 891904 }, "/transformer_blocks.16/Add_2_output_0.out0_0_66_bfp.out1_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 893440 }, "/transformer_blocks.16/ff/net.0/Mul_5_output_0.out17_2_32_bfp.out25_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 894976 }, "/transformer_blocks.16/ff/net.2/Add_output_0.out17_3_139_bfp.out25_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 901120 }, "/transformer_blocks.16/Add_3_output_0.out10_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 902656 }, "/transformer_blocks.16/attn/to_add_out/Add_output_0.out6_1_33_bfp.out7_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 905728 }, "/transformer_blocks.16/Add_4_output_0.out10_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 907264 }, "/transformer_blocks.16/norm2_context/LayerNormalization_output_0.out14_67_bfp.out15_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 910336 }, "/transformer_blocks.16/Add_6_output_0.out0_0_67_bfp.out1_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 911872 }, "/transformer_blocks.16/ff_context/net.0/Mul_5_output_0.out17_2_33_bfp.out25_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 913408 }, "/transformer_blocks.16/ff_context/net.2/Add_output_0.out17_3_140_bfp.out25_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 919552 }, "/transformer_blocks.16/Add_7_output_0.out10_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 921088 }, "/transformer_blocks.17/norm1_context/norm/LayerNormalization_output_0.out14_68_bfp.out15_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 924160 }, "/Add_16_output_0.out_35_1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 925696 }, "/transformer_blocks.17/norm1/norm/LayerNormalization_output_0.out14_69_bfp.out15_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 928768 }, "/transformer_blocks.17/norm1/Add_2_output_0.out0_0_69_bfp.out1_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 930304 }, "/transformer_blocks.17/norm1_context/Add_2_output_0.out0_0_68_bfp.out1_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 931840 }, "/transformer_blocks.17/attn/Concat_output_0.out22_17_bfp.out23_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 933376 }, "/transformer_blocks.17/attn/Concat_1_output_0.out22_17_bfp.out23_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 934912 }, "/transformer_blocks.17/attn/Concat_2_output_0.out22_17_bfp.out23_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 936448 }, "/transformer_blocks.17/attn/Reshape_3_output_0.out22_17_bfp.out27_0_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 937984 }, "/transformer_blocks.17/attn/to_out.0/Add_output_0.out6_1_34_bfp.out7_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 939520 }, "/transformer_blocks.17/Add_output_0.out10_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 941056 }, "/transformer_blocks.17/norm2/LayerNormalization_output_0.out14_70_bfp.out15_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 944128 }, "/transformer_blocks.17/Add_2_output_0.out0_0_70_bfp.out1_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 945664 }, "/transformer_blocks.17/ff/net.0/Mul_5_output_0.out17_2_34_bfp.out25_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 947200 }, "/transformer_blocks.17/ff/net.2/Add_output_0.out17_3_147_bfp.out25_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 953344 }, "/transformer_blocks.17/Add_3_output_0.out10_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 954880 }, "/Add_17_output_0.out_35_1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 957952 }, "/transformer_blocks.17/attn/to_add_out/Add_output_0.out6_1_35_bfp.out7_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 961024 }, "/transformer_blocks.17/Add_4_output_0.out10_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 962560 }, "/transformer_blocks.17/norm2_context/LayerNormalization_output_0.out14_71_bfp.out15_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 965632 }, "/transformer_blocks.17/Add_6_output_0.out0_0_71_bfp.out1_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 967168 }, "/transformer_blocks.17/ff_context/net.0/Mul_5_output_0.out17_2_35_bfp.out25_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 968704 }, "/transformer_blocks.17/ff_context/net.2/Add_output_0.out17_3_148_bfp.out25_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 974848 }, "/transformer_blocks.17/Add_7_output_0.out10_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 976384 }, "/transformer_blocks.18/norm1/norm/LayerNormalization_output_0.out14_73_bfp.out15_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 979456 }, "/transformer_blocks.18/norm1/Add_2_output_0.out0_0_73_bfp.out1_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 980992 }, "/transformer_blocks.18/norm1_context/norm/LayerNormalization_output_0.out14_72_bfp.out15_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 982528 }, "/transformer_blocks.18/norm1_context/Add_2_output_0.out0_0_72_bfp.out1_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 984064 }, "/transformer_blocks.18/attn/Concat_output_0.out22_18_bfp.out23_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 985600 }, "/transformer_blocks.18/attn/Concat_1_output_0.out22_18_bfp.out23_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 987136 }, "/transformer_blocks.18/attn/Concat_2_output_0.out22_18_bfp.out23_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 988672 }, "/transformer_blocks.18/attn/Reshape_3_output_0.out22_18_bfp.out27_0_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 990208 }, "/transformer_blocks.18/attn/to_out.0/Add_output_0.out6_1_36_bfp.out7_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 991744 }, "/transformer_blocks.18/Add_output_0.out10_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 993280 }, "/transformer_blocks.18/norm2/LayerNormalization_output_0.out14_74_bfp.out15_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 996352 }, "/transformer_blocks.18/Add_2_output_0.out0_0_74_bfp.out1_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 997888 }, "/transformer_blocks.18/ff/net.0/Mul_5_output_0.out17_2_36_bfp.out25_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 999424 }, "/transformer_blocks.18/ff/net.2/Add_output_0.out17_3_155_bfp.out25_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1005568 }, "/transformer_blocks.18/Add_3_output_0.out10_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1007104 }, "/transformer_blocks.18/attn/to_add_out/Add_output_0.out6_1_37_bfp.out7_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1010176 }, "/transformer_blocks.18/Add_4_output_0.out10_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1011712 }, "/transformer_blocks.18/norm2_context/LayerNormalization_output_0.out14_75_bfp.out15_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1014784 }, "/transformer_blocks.18/Add_6_output_0.out0_0_75_bfp.out1_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1016320 }, "/transformer_blocks.18/ff_context/net.0/Mul_5_output_0.out17_2_37_bfp.out25_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1017856 }, "/transformer_blocks.18/ff_context/net.2/Add_output_0.out17_3_156_bfp.out25_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1024000 }, "/transformer_blocks.18/Add_7_output_0.out10_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1025536 }, "/transformer_blocks.19/norm1_context/norm/LayerNormalization_output_0.out14_76_bfp.out15_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1028608 }, "/Add_18_output_0.out_35_1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1030144 }, "/transformer_blocks.19/norm1/norm/LayerNormalization_output_0.out14_77_bfp.out15_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1033216 }, "/transformer_blocks.19/norm1/Add_2_output_0.out0_0_77_bfp.out1_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1034752 }, "/transformer_blocks.19/norm1_context/Add_2_output_0.out0_0_76_bfp.out1_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1036288 }, "/transformer_blocks.19/attn/Concat_output_0.out22_19_bfp.out23_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1037824 }, "/transformer_blocks.19/attn/Concat_1_output_0.out22_19_bfp.out23_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1039360 }, "/transformer_blocks.19/attn/Concat_2_output_0.out22_19_bfp.out23_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1040896 }, "/transformer_blocks.19/attn/Reshape_3_output_0.out22_19_bfp.out27_0_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 1042432 }, "/transformer_blocks.19/attn/to_out.0/Add_output_0.out6_1_38_bfp.out7_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1043968 }, "/transformer_blocks.19/Add_output_0.out10_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1045504 }, "/transformer_blocks.19/norm2/LayerNormalization_output_0.out14_78_bfp.out15_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1048576 }, "/transformer_blocks.19/Add_2_output_0.out0_0_78_bfp.out1_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1050112 }, "/transformer_blocks.19/ff/net.0/Mul_5_output_0.out17_2_38_bfp.out25_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1051648 }, "/transformer_blocks.19/ff/net.2/Add_output_0.out17_3_163_bfp.out25_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1057792 }, "/transformer_blocks.19/Add_3_output_0.out10_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1059328 }, "/Add_19_output_0.out_35_1_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1062400 }, "/transformer_blocks.19/attn/to_add_out/Add_output_0.out6_1_39_bfp.out7_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1065472 }, "/transformer_blocks.19/Add_4_output_0.out10_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1067008 }, "/transformer_blocks.19/norm2_context/LayerNormalization_output_0.out14_79_bfp.out15_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1070080 }, "/transformer_blocks.19/Add_6_output_0.out0_0_79_bfp.out1_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1071616 }, "/transformer_blocks.19/ff_context/net.0/Mul_5_output_0.out17_2_39_bfp.out25_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1073152 }, "/transformer_blocks.19/ff_context/net.2/Add_output_0.out17_3_164_bfp.out25_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1079296 }, "/transformer_blocks.19/Add_7_output_0.out10_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1080832 }, "/transformer_blocks.20/norm1/norm/LayerNormalization_output_0.out14_81_bfp.out15_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1083904 }, "/transformer_blocks.20/norm1_context/norm/LayerNormalization_output_0.out14_80_bfp.out15_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1085440 }, "/transformer_blocks.20/norm1/Add_2_output_0.out0_0_81_bfp.out1_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1086976 }, "/transformer_blocks.20/norm1_context/Add_2_output_0.out0_0_80_bfp.out1_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1088512 }, "/transformer_blocks.20/attn/Concat_output_0.out22_20_bfp.out23_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1090048 }, "/transformer_blocks.20/attn/Concat_1_output_0.out22_20_bfp.out23_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1091584 }, "/transformer_blocks.20/attn/Concat_2_output_0.out22_20_bfp.out23_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1093120 }, "/transformer_blocks.20/attn/Reshape_3_output_0.out22_20_bfp.out27_0_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 1094656 }, "/transformer_blocks.20/attn/to_add_out/Add_output_0.out6_1_41_bfp.out7_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1096192 }, "/transformer_blocks.20/attn/to_out.0/Add_output_0.out6_1_40_bfp.out7_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1097728 }, "/transformer_blocks.20/Add_output_0.out10_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1099264 }, "/transformer_blocks.20/norm2/LayerNormalization_output_0.out14_82_bfp.out15_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1102336 }, "/transformer_blocks.20/Add_2_output_0.out0_0_82_bfp.out1_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1103872 }, "/transformer_blocks.20/ff/net.0/Mul_5_output_0.out17_2_40_bfp.out25_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1105408 }, "/transformer_blocks.20/ff/net.2/Add_output_0.out17_3_171_bfp.out25_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1111552 }, "/transformer_blocks.20/Add_3_output_0.out10_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1113088 }, "/transformer_blocks.20/Add_4_output_0.out10_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1116160 }, "/transformer_blocks.20/norm2_context/LayerNormalization_output_0.out14_83_bfp.out15_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1119232 }, "/transformer_blocks.20/Add_6_output_0.out0_0_83_bfp.out1_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1120768 }, "/transformer_blocks.20/ff_context/net.0/Mul_5_output_0.out17_2_41_bfp.out25_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1122304 }, "/transformer_blocks.20/ff_context/net.2/Add_output_0.out17_3_172_bfp.out25_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1128448 }, "/transformer_blocks.20/Add_7_output_0.out10_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1129984 }, "/transformer_blocks.21/norm1_context/norm/LayerNormalization_output_0.out14_84_bfp.out15_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1133056 }, "/Add_20_output_0.out_35_1_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1134592 }, "/transformer_blocks.21/norm1/norm/LayerNormalization_output_0.out14_85_bfp.out15_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1137664 }, "/transformer_blocks.21/norm1/Add_2_output_0.out0_0_85_bfp.out1_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1139200 }, "/transformer_blocks.21/norm1_context/Add_2_output_0.out0_0_84_bfp.out1_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1140736 }, "/transformer_blocks.21/attn/Concat_output_0.out22_21_bfp.out23_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1142272 }, "/transformer_blocks.21/attn/Concat_1_output_0.out22_21_bfp.out23_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1143808 }, "/transformer_blocks.21/attn/Concat_2_output_0.out22_21_bfp.out23_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1145344 }, "/transformer_blocks.21/attn/Reshape_3_output_0.out22_21_bfp.out27_0_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 1146880 }, "/transformer_blocks.21/attn/to_out.0/Add_output_0.out6_1_42_bfp.out7_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1148416 }, "/transformer_blocks.21/Add_output_0.out10_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1149952 }, "/transformer_blocks.21/norm2/LayerNormalization_output_0.out14_86_bfp.out15_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1153024 }, "/transformer_blocks.21/Add_2_output_0.out0_0_86_bfp.out1_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1154560 }, "/transformer_blocks.21/ff/net.0/Mul_5_output_0.out17_2_42_bfp.out25_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1156096 }, "/transformer_blocks.21/ff/net.2/Add_output_0.out17_3_179_bfp.out25_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1162240 }, "/transformer_blocks.21/Add_3_output_0.out10_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1163776 }, "/Add_21_output_0.out_35_1_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1166848 }, "/transformer_blocks.21/attn/to_add_out/Add_output_0.out6_1_43_bfp.out7_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1169920 }, "/transformer_blocks.21/Add_4_output_0.out10_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1171456 }, "/transformer_blocks.21/norm2_context/LayerNormalization_output_0.out14_87_bfp.out15_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1174528 }, "/transformer_blocks.21/Add_6_output_0.out0_0_87_bfp.out1_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1176064 }, "/transformer_blocks.21/ff_context/net.0/Mul_5_output_0.out17_2_43_bfp.out25_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1177600 }, "/transformer_blocks.21/ff_context/net.2/Add_output_0.out17_3_180_bfp.out25_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1183744 }, "/transformer_blocks.21/Add_7_output_0.out10_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1185280 }, "/transformer_blocks.22/norm1/norm/LayerNormalization_output_0.out14_89_bfp.out15_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1188352 }, "/transformer_blocks.22/norm1/Add_2_output_0.out0_0_89_bfp.out1_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1189888 }, "/transformer_blocks.22/norm1_context/norm/LayerNormalization_output_0.out14_88_bfp.out15_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1191424 }, "/transformer_blocks.22/norm1_context/Add_2_output_0.out0_0_88_bfp.out1_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1192960 }, "/transformer_blocks.22/attn/Concat_output_0.out22_22_bfp.out23_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1194496 }, "/transformer_blocks.22/attn/Concat_1_output_0.out22_22_bfp.out23_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1196032 }, "/transformer_blocks.22/attn/Concat_2_output_0.out22_22_bfp.out23_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1197568 }, "/transformer_blocks.22/attn/Reshape_3_output_0.out22_22_bfp.out27_0_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 1199104 }, "/transformer_blocks.22/attn/to_out.0/Add_output_0.out6_1_44_bfp.out7_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1200640 }, "/transformer_blocks.22/Add_output_0.out10_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1202176 }, "/transformer_blocks.22/norm2/LayerNormalization_output_0.out14_90_bfp.out15_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1205248 }, "/transformer_blocks.22/Add_2_output_0.out0_0_90_bfp.out1_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1206784 }, "/transformer_blocks.22/ff/net.0/Mul_5_output_0.out17_2_44_bfp.out25_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1208320 }, "/transformer_blocks.22/ff/net.2/Add_output_0.out17_3_187_bfp.out25_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1214464 }, "/transformer_blocks.22/Add_3_output_0.out10_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1216000 }, "/transformer_blocks.22/attn/to_add_out/Add_output_0.out6_1_45_bfp.out7_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1219072 }, "/transformer_blocks.22/Add_4_output_0.out10_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1220608 }, "/transformer_blocks.22/norm2_context/LayerNormalization_output_0.out14_91_bfp.out15_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1223680 }, "/transformer_blocks.22/Add_6_output_0.out0_0_91_bfp.out1_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1225216 }, "/transformer_blocks.22/ff_context/net.0/Mul_5_output_0.out17_2_45_bfp.out25_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1226752 }, "/transformer_blocks.22/ff_context/net.2/Add_output_0.out17_3_188_bfp.out25_96": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1232896 }, "/transformer_blocks.22/Add_7_output_0.out10_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1234432 }, "/transformer_blocks.23/norm1_context/norm/LayerNormalization_output_0.out14_92_bfp.out15_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1237504 }, "/Add_22_output_0.out_35_1_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1239040 }, "/transformer_blocks.23/norm1/norm/LayerNormalization_output_0.out14_93_bfp.out15_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1242112 }, "/transformer_blocks.23/norm1/Add_2_output_0.out0_0_93_bfp.out1_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1243648 }, "/transformer_blocks.23/norm1_context/Add_2_output_0.out0_0_92_bfp.out1_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 1245184 }, "/transformer_blocks.23/attn/Concat_output_0.out22_23_bfp.out23_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1246720 }, "/transformer_blocks.23/attn/Concat_1_output_0.out22_23_bfp.out23_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1248256 }, "/transformer_blocks.23/attn/Concat_2_output_0.out22_23_bfp.out23_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + state_dim1", "False" ], "offset": 1249792 }, "/transformer_blocks.23/attn/Reshape_3_output_0.out22_23_bfp.out27_0_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + state_dim1", "False" ], "offset": 1251328 }, "/transformer_blocks.23/attn/to_out.0/Add_output_0.out6_1_46_bfp.out7_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1252864 }, "/transformer_blocks.23/Add_output_0.out10_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1254400 }, "/transformer_blocks.23/norm2/LayerNormalization_output_0.out14_94_bfp.out15_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1257472 }, "/transformer_blocks.23/Add_2_output_0.out0_0_94_bfp.out1_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1259008 }, "/transformer_blocks.23/ff/net.0/Mul_5_output_0.out17_2_46_bfp.out25_97": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1260544 }, "/transformer_blocks.23/ff/net.2/Add_output_0.out17_3_195_bfp.out25_98": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1266688 }, "/transformer_blocks.23/Add_3_output_0.out10_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1268224 }, "/norm_out/norm/LayerNormalization_output_0.out14_95_bfp.out15_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1271296 }, "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1272832 }, "/norm_out/Add_2_output_0.out0_0_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "state_dim1", "False" ], "offset": 1274368 }, "existing_model.pos_embed.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 0, "file_name": "cache/pos_embedprojConv_0.const", "file_size": 135168 }, "existing_model.time_text_embed.timestep_embedder.linear_1.weight_5_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 448512 ], "size_in_bytes": 448512, "op_tensor_size": 448512, "offset": 135168, "file_name": "cache/pos_embedprojConv_1.const", "file_size": 448512 }, "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 583680, "file_name": "cache/pos_embedprojConv_2.const", "file_size": 256 }, "existing_model.time_text_embed.timestep_embedder.linear_2.weight_5_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 583936, "file_name": "cache/pos_embedprojConv_3.const", "file_size": 2691072 }, "existing_model.time_text_embed.text_embedder.linear_1.weight_5_1_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 3588096 ], "size_in_bytes": 3588096, "op_tensor_size": 3588096, "offset": 3275008, "file_name": "cache/pos_embedprojConv_4.const", "file_size": 3588096 }, "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 6863104, "file_name": "cache/pos_embedprojConv_5.const", "file_size": 256 }, "existing_model.time_text_embed.text_embedder.linear_2.weight_5_1_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 6863360, "file_name": "cache/pos_embedprojConv_6.const", "file_size": 2691072 }, "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 9554432, "file_name": "cache/pos_embedprojConv_7.const", "file_size": 256 }, "encoder_hidden_states.out17_3_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 9554688, "file_name": "cache/pos_embedprojConv_8.const", "file_size": 128 }, "onnx::MatMul_7905": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 7274496 ], "size_in_bytes": 7274496, "op_tensor_size": 7274496, "offset": 9554816, "file_name": "cache/pos_embedprojConv_9.const", "file_size": 7274496 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 16829312, "file_name": "cache/pos_embedprojConv_10.const", "file_size": 128 }, "/transformer_blocks.0/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16829440, "file_name": "cache/pos_embedprojConv_11.const", "file_size": 3072 }, "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16832512, "file_name": "cache/pos_embedprojConv_12.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 16835584, "file_name": "cache/pos_embedprojConv_13.const", "file_size": 5382144 }, "/transformer_blocks.0/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22217728, "file_name": "cache/pos_embedprojConv_14.const", "file_size": 3072 }, "/transformer_blocks.0/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22220800, "file_name": "cache/pos_embedprojConv_15.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 22223872, "file_name": "cache/pos_embedprojConv_16.const", "file_size": 5382144 }, "onnx::MatMul_7909_onnx::MatMul_7906": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 27606016, "file_name": "cache/pos_embedprojConv_17.const", "file_size": 5382144 }, "onnx::MatMul_7910_onnx::MatMul_7907": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 32988160, "file_name": "cache/pos_embedprojConv_18.const", "file_size": 5382144 }, "onnx::MatMul_7911_onnx::MatMul_7908": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 38370304, "file_name": "cache/pos_embedprojConv_19.const", "file_size": 5382144 }, "onnx::MatMul_7926": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 43752448, "file_name": "cache/pos_embedprojConv_20.const", "file_size": 2691072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 46443520, "file_name": "cache/pos_embedprojConv_21.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49134592, "file_name": "cache/pos_embedprojConv_22.const", "file_size": 3072 }, "/transformer_blocks.0/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49137664, "file_name": "cache/pos_embedprojConv_23.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 49140736, "file_name": "cache/pos_embedprojConv_24.const", "file_size": 5382144 }, "onnx::MatMul_7929": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 54522880, "file_name": "cache/pos_embedprojConv_25.const", "file_size": 10764288 }, "onnx::MatMul_7930": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 65287168, "file_name": "cache/pos_embedprojConv_26.const", "file_size": 10764288 }, "existing_model.transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 76051456, "file_name": "cache/pos_embedprojConv_27.const", "file_size": 2691072 }, "onnx::MatMul_7925": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 78742528, "file_name": "cache/pos_embedprojConv_28.const", "file_size": 2691072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 81433600, "file_name": "cache/pos_embedprojConv_29.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84124672, "file_name": "cache/pos_embedprojConv_30.const", "file_size": 3072 }, "/transformer_blocks.0/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84127744, "file_name": "cache/pos_embedprojConv_31.const", "file_size": 3072 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3_existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 84130816, "file_name": "cache/pos_embedprojConv_32.const", "file_size": 5382144 }, "onnx::MatMul_7927": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 89512960, "file_name": "cache/pos_embedprojConv_33.const", "file_size": 10764288 }, "onnx::MatMul_7928": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 100277248, "file_name": "cache/pos_embedprojConv_34.const", "file_size": 10764288 }, "existing_model.transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 111041536, "file_name": "cache/pos_embedprojConv_35.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113732608, "file_name": "cache/pos_embedprojConv_36.const", "file_size": 3072 }, "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113735680, "file_name": "cache/pos_embedprojConv_37.const", "file_size": 3072 }, "/transformer_blocks.1/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113738752, "file_name": "cache/pos_embedprojConv_38.const", "file_size": 3072 }, "/transformer_blocks.1/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113741824, "file_name": "cache/pos_embedprojConv_39.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 113744896, "file_name": "cache/pos_embedprojConv_40.const", "file_size": 5382144 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 119127040, "file_name": "cache/pos_embedprojConv_41.const", "file_size": 5382144 }, "onnx::MatMul_7934_onnx::MatMul_7931": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 124509184, "file_name": "cache/pos_embedprojConv_42.const", "file_size": 5382144 }, "onnx::MatMul_7935_onnx::MatMul_7932": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 129891328, "file_name": "cache/pos_embedprojConv_43.const", "file_size": 5382144 }, "onnx::MatMul_7936_onnx::MatMul_7933": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 135273472, "file_name": "cache/pos_embedprojConv_44.const", "file_size": 5382144 }, "onnx::MatMul_7950": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 140655616, "file_name": "cache/pos_embedprojConv_45.const", "file_size": 2691072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 143346688, "file_name": "cache/pos_embedprojConv_46.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 146037760, "file_name": "cache/pos_embedprojConv_47.const", "file_size": 3072 }, "/transformer_blocks.1/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 146040832, "file_name": "cache/pos_embedprojConv_48.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 146043904, "file_name": "cache/pos_embedprojConv_49.const", "file_size": 5382144 }, "onnx::MatMul_7952": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 151426048, "file_name": "cache/pos_embedprojConv_50.const", "file_size": 10764288 }, "onnx::MatMul_7953": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 162190336, "file_name": "cache/pos_embedprojConv_51.const", "file_size": 10764288 }, "existing_model.transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 172954624, "file_name": "cache/pos_embedprojConv_52.const", "file_size": 2691072 }, "onnx::MatMul_7951": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 175645696, "file_name": "cache/pos_embedprojConv_53.const", "file_size": 2691072 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 178336768, "file_name": "cache/pos_embedprojConv_54.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 181027840, "file_name": "cache/pos_embedprojConv_55.const", "file_size": 3072 }, "/transformer_blocks.1/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 181030912, "file_name": "cache/pos_embedprojConv_56.const", "file_size": 3072 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 181033984, "file_name": "cache/pos_embedprojConv_57.const", "file_size": 5382144 }, "onnx::MatMul_7954": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 186416128, "file_name": "cache/pos_embedprojConv_58.const", "file_size": 10764288 }, "onnx::MatMul_7955": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 197180416, "file_name": "cache/pos_embedprojConv_59.const", "file_size": 10764288 }, "existing_model.transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 207944704, "file_name": "cache/pos_embedprojConv_60.const", "file_size": 2691072 }, "/transformer_blocks.2/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 210635776, "file_name": "cache/pos_embedprojConv_61.const", "file_size": 3072 }, "/transformer_blocks.2/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 210638848, "file_name": "cache/pos_embedprojConv_62.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 210641920, "file_name": "cache/pos_embedprojConv_63.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 210644992, "file_name": "cache/pos_embedprojConv_64.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 210648064, "file_name": "cache/pos_embedprojConv_65.const", "file_size": 5382144 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 216030208, "file_name": "cache/pos_embedprojConv_66.const", "file_size": 5382144 }, "onnx::MatMul_7959_onnx::MatMul_7956": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 221412352, "file_name": "cache/pos_embedprojConv_67.const", "file_size": 5382144 }, "onnx::MatMul_7960_onnx::MatMul_7957": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 226794496, "file_name": "cache/pos_embedprojConv_68.const", "file_size": 5382144 }, "onnx::MatMul_7961_onnx::MatMul_7958": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 232176640, "file_name": "cache/pos_embedprojConv_69.const", "file_size": 5382144 }, "onnx::MatMul_7976": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 237558784, "file_name": "cache/pos_embedprojConv_70.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 240249856, "file_name": "cache/pos_embedprojConv_71.const", "file_size": 2691072 }, "onnx::MatMul_7975": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 242940928, "file_name": "cache/pos_embedprojConv_72.const", "file_size": 2691072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 245632000, "file_name": "cache/pos_embedprojConv_73.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248323072, "file_name": "cache/pos_embedprojConv_74.const", "file_size": 3072 }, "/transformer_blocks.2/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248326144, "file_name": "cache/pos_embedprojConv_75.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 248329216, "file_name": "cache/pos_embedprojConv_76.const", "file_size": 5382144 }, "onnx::MatMul_7977": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 253711360, "file_name": "cache/pos_embedprojConv_77.const", "file_size": 10764288 }, "onnx::MatMul_7978": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 264475648, "file_name": "cache/pos_embedprojConv_78.const", "file_size": 10764288 }, "/transformer_blocks.2/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 275239936, "file_name": "cache/pos_embedprojConv_79.const", "file_size": 3072 }, "/transformer_blocks.2/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 275243008, "file_name": "cache/pos_embedprojConv_80.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 275246080, "file_name": "cache/pos_embedprojConv_81.const", "file_size": 5382144 }, "onnx::MatMul_7979": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 280628224, "file_name": "cache/pos_embedprojConv_82.const", "file_size": 10764288 }, "onnx::MatMul_7980": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 291392512, "file_name": "cache/pos_embedprojConv_83.const", "file_size": 10764288 }, "existing_model.transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 302156800, "file_name": "cache/pos_embedprojConv_84.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 304847872, "file_name": "cache/pos_embedprojConv_85.const", "file_size": 3072 }, "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 304850944, "file_name": "cache/pos_embedprojConv_86.const", "file_size": 3072 }, "existing_model.transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 304854016, "file_name": "cache/pos_embedprojConv_87.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 307545088, "file_name": "cache/pos_embedprojConv_88.const", "file_size": 3072 }, "/transformer_blocks.3/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 307548160, "file_name": "cache/pos_embedprojConv_89.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 307551232, "file_name": "cache/pos_embedprojConv_90.const", "file_size": 5382144 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 312933376, "file_name": "cache/pos_embedprojConv_91.const", "file_size": 5382144 }, "onnx::MatMul_7984_onnx::MatMul_7981": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 318315520, "file_name": "cache/pos_embedprojConv_92.const", "file_size": 5382144 }, "onnx::MatMul_7985_onnx::MatMul_7982": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 323697664, "file_name": "cache/pos_embedprojConv_93.const", "file_size": 5382144 }, "onnx::MatMul_7986_onnx::MatMul_7983": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 329079808, "file_name": "cache/pos_embedprojConv_94.const", "file_size": 5382144 }, "onnx::MatMul_8001": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 334461952, "file_name": "cache/pos_embedprojConv_95.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 337153024, "file_name": "cache/pos_embedprojConv_96.const", "file_size": 2691072 }, "onnx::MatMul_8000": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 339844096, "file_name": "cache/pos_embedprojConv_97.const", "file_size": 2691072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 342535168, "file_name": "cache/pos_embedprojConv_98.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 345226240, "file_name": "cache/pos_embedprojConv_99.const", "file_size": 3072 }, "/transformer_blocks.3/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 345229312, "file_name": "cache/pos_embedprojConv_100.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 345232384, "file_name": "cache/pos_embedprojConv_101.const", "file_size": 5382144 }, "onnx::MatMul_8002": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 350614528, "file_name": "cache/pos_embedprojConv_102.const", "file_size": 10764288 }, "onnx::MatMul_8003": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 361378816, "file_name": "cache/pos_embedprojConv_103.const", "file_size": 10764288 }, "existing_model.transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 372143104, "file_name": "cache/pos_embedprojConv_104.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 374834176, "file_name": "cache/pos_embedprojConv_105.const", "file_size": 3072 }, "/transformer_blocks.3/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 374837248, "file_name": "cache/pos_embedprojConv_106.const", "file_size": 3072 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 374840320, "file_name": "cache/pos_embedprojConv_107.const", "file_size": 5382144 }, "onnx::MatMul_8004": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 380222464, "file_name": "cache/pos_embedprojConv_108.const", "file_size": 10764288 }, "onnx::MatMul_8005": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 390986752, "file_name": "cache/pos_embedprojConv_109.const", "file_size": 10764288 }, "existing_model.transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 401751040, "file_name": "cache/pos_embedprojConv_110.const", "file_size": 2691072 }, "/transformer_blocks.4/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 404442112, "file_name": "cache/pos_embedprojConv_111.const", "file_size": 3072 }, "/transformer_blocks.4/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 404445184, "file_name": "cache/pos_embedprojConv_112.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 404448256, "file_name": "cache/pos_embedprojConv_113.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 404451328, "file_name": "cache/pos_embedprojConv_114.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 404454400, "file_name": "cache/pos_embedprojConv_115.const", "file_size": 5382144 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 409836544, "file_name": "cache/pos_embedprojConv_116.const", "file_size": 5382144 }, "onnx::MatMul_8009_onnx::MatMul_8006": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 415218688, "file_name": "cache/pos_embedprojConv_117.const", "file_size": 5382144 }, "onnx::MatMul_8010_onnx::MatMul_8007": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 420600832, "file_name": "cache/pos_embedprojConv_118.const", "file_size": 5382144 }, "onnx::MatMul_8011_onnx::MatMul_8008": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 425982976, "file_name": "cache/pos_embedprojConv_119.const", "file_size": 5382144 }, "onnx::MatMul_8026": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 431365120, "file_name": "cache/pos_embedprojConv_120.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 434056192, "file_name": "cache/pos_embedprojConv_121.const", "file_size": 2691072 }, "onnx::MatMul_8025": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 436747264, "file_name": "cache/pos_embedprojConv_122.const", "file_size": 2691072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 439438336, "file_name": "cache/pos_embedprojConv_123.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 442129408, "file_name": "cache/pos_embedprojConv_124.const", "file_size": 3072 }, "/transformer_blocks.4/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 442132480, "file_name": "cache/pos_embedprojConv_125.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 442135552, "file_name": "cache/pos_embedprojConv_126.const", "file_size": 5382144 }, "onnx::MatMul_8027": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 447517696, "file_name": "cache/pos_embedprojConv_127.const", "file_size": 10764288 }, "onnx::MatMul_8028": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 458281984, "file_name": "cache/pos_embedprojConv_128.const", "file_size": 10764288 }, "/transformer_blocks.4/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 469046272, "file_name": "cache/pos_embedprojConv_129.const", "file_size": 3072 }, "/transformer_blocks.4/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 469049344, "file_name": "cache/pos_embedprojConv_130.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 469052416, "file_name": "cache/pos_embedprojConv_131.const", "file_size": 5382144 }, "onnx::MatMul_8029": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 474434560, "file_name": "cache/pos_embedprojConv_132.const", "file_size": 10764288 }, "onnx::MatMul_8030": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 485198848, "file_name": "cache/pos_embedprojConv_133.const", "file_size": 10764288 }, "existing_model.transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 495963136, "file_name": "cache/pos_embedprojConv_134.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 498654208, "file_name": "cache/pos_embedprojConv_135.const", "file_size": 3072 }, "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 498657280, "file_name": "cache/pos_embedprojConv_136.const", "file_size": 3072 }, "existing_model.transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 498660352, "file_name": "cache/pos_embedprojConv_137.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 501351424, "file_name": "cache/pos_embedprojConv_138.const", "file_size": 3072 }, "/transformer_blocks.5/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 501354496, "file_name": "cache/pos_embedprojConv_139.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 501357568, "file_name": "cache/pos_embedprojConv_140.const", "file_size": 5382144 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 506739712, "file_name": "cache/pos_embedprojConv_141.const", "file_size": 5382144 }, "onnx::MatMul_8034_onnx::MatMul_8031": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 512121856, "file_name": "cache/pos_embedprojConv_142.const", "file_size": 5382144 }, "onnx::MatMul_8035_onnx::MatMul_8032": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 517504000, "file_name": "cache/pos_embedprojConv_143.const", "file_size": 5382144 }, "onnx::MatMul_8036_onnx::MatMul_8033": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 522886144, "file_name": "cache/pos_embedprojConv_144.const", "file_size": 5382144 }, "onnx::MatMul_8051": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 528268288, "file_name": "cache/pos_embedprojConv_145.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 530959360, "file_name": "cache/pos_embedprojConv_146.const", "file_size": 2691072 }, "onnx::MatMul_8050": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 533650432, "file_name": "cache/pos_embedprojConv_147.const", "file_size": 2691072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 536341504, "file_name": "cache/pos_embedprojConv_148.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 539032576, "file_name": "cache/pos_embedprojConv_149.const", "file_size": 3072 }, "/transformer_blocks.5/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 539035648, "file_name": "cache/pos_embedprojConv_150.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 539038720, "file_name": "cache/pos_embedprojConv_151.const", "file_size": 5382144 }, "onnx::MatMul_8052": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 544420864, "file_name": "cache/pos_embedprojConv_152.const", "file_size": 10764288 }, "onnx::MatMul_8053": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 555185152, "file_name": "cache/pos_embedprojConv_153.const", "file_size": 10764288 }, "existing_model.transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 565949440, "file_name": "cache/pos_embedprojConv_154.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 568640512, "file_name": "cache/pos_embedprojConv_155.const", "file_size": 3072 }, "/transformer_blocks.5/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 568643584, "file_name": "cache/pos_embedprojConv_156.const", "file_size": 3072 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_3_existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 568646656, "file_name": "cache/pos_embedprojConv_157.const", "file_size": 5382144 }, "onnx::MatMul_8054": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 574028800, "file_name": "cache/pos_embedprojConv_158.const", "file_size": 10764288 }, "onnx::MatMul_8055": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 584793088, "file_name": "cache/pos_embedprojConv_159.const", "file_size": 10764288 }, "existing_model.transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 595557376, "file_name": "cache/pos_embedprojConv_160.const", "file_size": 2691072 }, "/transformer_blocks.6/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 598248448, "file_name": "cache/pos_embedprojConv_161.const", "file_size": 3072 }, "/transformer_blocks.6/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 598251520, "file_name": "cache/pos_embedprojConv_162.const", "file_size": 3072 }, "/transformer_blocks.6/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 598254592, "file_name": "cache/pos_embedprojConv_163.const", "file_size": 3072 }, "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 598257664, "file_name": "cache/pos_embedprojConv_164.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_0_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 598260736, "file_name": "cache/pos_embedprojConv_165.const", "file_size": 5382144 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_0_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 603642880, "file_name": "cache/pos_embedprojConv_166.const", "file_size": 5382144 }, "onnx::MatMul_8059_onnx::MatMul_8056": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 609025024, "file_name": "cache/pos_embedprojConv_167.const", "file_size": 5382144 }, "onnx::MatMul_8060_onnx::MatMul_8057": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 614407168, "file_name": "cache/pos_embedprojConv_168.const", "file_size": 5382144 }, "onnx::MatMul_8061_onnx::MatMul_8058": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 619789312, "file_name": "cache/pos_embedprojConv_169.const", "file_size": 5382144 }, "onnx::MatMul_8076": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 625171456, "file_name": "cache/pos_embedprojConv_170.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 627862528, "file_name": "cache/pos_embedprojConv_171.const", "file_size": 2691072 }, "onnx::MatMul_8075": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 630553600, "file_name": "cache/pos_embedprojConv_172.const", "file_size": 2691072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 633244672, "file_name": "cache/pos_embedprojConv_173.const", "file_size": 2691072 }, "/transformer_blocks.6/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 635935744, "file_name": "cache/pos_embedprojConv_174.const", "file_size": 3072 }, "/transformer_blocks.6/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 635938816, "file_name": "cache/pos_embedprojConv_175.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_3_existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 635941888, "file_name": "cache/pos_embedprojConv_176.const", "file_size": 5382144 }, "onnx::MatMul_8077": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 641324032, "file_name": "cache/pos_embedprojConv_177.const", "file_size": 10764288 }, "onnx::MatMul_8078": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 652088320, "file_name": "cache/pos_embedprojConv_178.const", "file_size": 10764288 }, "/transformer_blocks.6/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 662852608, "file_name": "cache/pos_embedprojConv_179.const", "file_size": 3072 }, "/transformer_blocks.6/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 662855680, "file_name": "cache/pos_embedprojConv_180.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_3_existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 662858752, "file_name": "cache/pos_embedprojConv_181.const", "file_size": 5382144 }, "onnx::MatMul_8079": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 668240896, "file_name": "cache/pos_embedprojConv_182.const", "file_size": 10764288 }, "onnx::MatMul_8080": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 679005184, "file_name": "cache/pos_embedprojConv_183.const", "file_size": 10764288 }, "existing_model.transformer_blocks.6.norm1_context.linear.weight_5_1_17_27_13_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 689769472, "file_name": "cache/pos_embedprojConv_184.const", "file_size": 2691072 }, "/transformer_blocks.7/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 692460544, "file_name": "cache/pos_embedprojConv_185.const", "file_size": 3072 }, "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 692463616, "file_name": "cache/pos_embedprojConv_186.const", "file_size": 3072 }, "existing_model.transformer_blocks.6.norm1.linear.weight_5_1_16_27_12_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 692466688, "file_name": "cache/pos_embedprojConv_187.const", "file_size": 2691072 }, "/transformer_blocks.7/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 695157760, "file_name": "cache/pos_embedprojConv_188.const", "file_size": 3072 }, "/transformer_blocks.7/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 695160832, "file_name": "cache/pos_embedprojConv_189.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_0_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 695163904, "file_name": "cache/pos_embedprojConv_190.const", "file_size": 5382144 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_0_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 700546048, "file_name": "cache/pos_embedprojConv_191.const", "file_size": 5382144 }, "onnx::MatMul_8084_onnx::MatMul_8081": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 705928192, "file_name": "cache/pos_embedprojConv_192.const", "file_size": 5382144 }, "onnx::MatMul_8085_onnx::MatMul_8082": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 711310336, "file_name": "cache/pos_embedprojConv_193.const", "file_size": 5382144 }, "onnx::MatMul_8086_onnx::MatMul_8083": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 716692480, "file_name": "cache/pos_embedprojConv_194.const", "file_size": 5382144 }, "onnx::MatMul_8101": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 722074624, "file_name": "cache/pos_embedprojConv_195.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 724765696, "file_name": "cache/pos_embedprojConv_196.const", "file_size": 2691072 }, "onnx::MatMul_8100": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 727456768, "file_name": "cache/pos_embedprojConv_197.const", "file_size": 2691072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 730147840, "file_name": "cache/pos_embedprojConv_198.const", "file_size": 2691072 }, "/transformer_blocks.7/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 732838912, "file_name": "cache/pos_embedprojConv_199.const", "file_size": 3072 }, "/transformer_blocks.7/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 732841984, "file_name": "cache/pos_embedprojConv_200.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_3_existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 732845056, "file_name": "cache/pos_embedprojConv_201.const", "file_size": 5382144 }, "onnx::MatMul_8102": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 738227200, "file_name": "cache/pos_embedprojConv_202.const", "file_size": 10764288 }, "onnx::MatMul_8103": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 748991488, "file_name": "cache/pos_embedprojConv_203.const", "file_size": 10764288 }, "existing_model.transformer_blocks.7.norm1.linear.weight_5_1_18_27_14_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 759755776, "file_name": "cache/pos_embedprojConv_204.const", "file_size": 2691072 }, "/transformer_blocks.7/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 762446848, "file_name": "cache/pos_embedprojConv_205.const", "file_size": 3072 }, "/transformer_blocks.7/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 762449920, "file_name": "cache/pos_embedprojConv_206.const", "file_size": 3072 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_3_existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 762452992, "file_name": "cache/pos_embedprojConv_207.const", "file_size": 5382144 }, "onnx::MatMul_8104": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 767835136, "file_name": "cache/pos_embedprojConv_208.const", "file_size": 10764288 }, "onnx::MatMul_8105": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 778599424, "file_name": "cache/pos_embedprojConv_209.const", "file_size": 10764288 }, "existing_model.transformer_blocks.7.norm1_context.linear.weight_5_1_19_27_15_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 789363712, "file_name": "cache/pos_embedprojConv_210.const", "file_size": 2691072 }, "/transformer_blocks.8/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 792054784, "file_name": "cache/pos_embedprojConv_211.const", "file_size": 3072 }, "/transformer_blocks.8/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 792057856, "file_name": "cache/pos_embedprojConv_212.const", "file_size": 3072 }, "/transformer_blocks.8/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 792060928, "file_name": "cache/pos_embedprojConv_213.const", "file_size": 3072 }, "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 792064000, "file_name": "cache/pos_embedprojConv_214.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_0_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 792067072, "file_name": "cache/pos_embedprojConv_215.const", "file_size": 5382144 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_0_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 797449216, "file_name": "cache/pos_embedprojConv_216.const", "file_size": 5382144 }, "onnx::MatMul_8109_onnx::MatMul_8106": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 802831360, "file_name": "cache/pos_embedprojConv_217.const", "file_size": 5382144 }, "onnx::MatMul_8110_onnx::MatMul_8107": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 808213504, "file_name": "cache/pos_embedprojConv_218.const", "file_size": 5382144 }, "onnx::MatMul_8111_onnx::MatMul_8108": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 813595648, "file_name": "cache/pos_embedprojConv_219.const", "file_size": 5382144 }, "onnx::MatMul_8126": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 818977792, "file_name": "cache/pos_embedprojConv_220.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 821668864, "file_name": "cache/pos_embedprojConv_221.const", "file_size": 2691072 }, "onnx::MatMul_8125": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 824359936, "file_name": "cache/pos_embedprojConv_222.const", "file_size": 2691072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 827051008, "file_name": "cache/pos_embedprojConv_223.const", "file_size": 2691072 }, "/transformer_blocks.8/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 829742080, "file_name": "cache/pos_embedprojConv_224.const", "file_size": 3072 }, "/transformer_blocks.8/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 829745152, "file_name": "cache/pos_embedprojConv_225.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_3_existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 829748224, "file_name": "cache/pos_embedprojConv_226.const", "file_size": 5382144 }, "onnx::MatMul_8127": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 835130368, "file_name": "cache/pos_embedprojConv_227.const", "file_size": 10764288 }, "onnx::MatMul_8128": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 845894656, "file_name": "cache/pos_embedprojConv_228.const", "file_size": 10764288 }, "/transformer_blocks.8/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 856658944, "file_name": "cache/pos_embedprojConv_229.const", "file_size": 3072 }, "/transformer_blocks.8/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 856662016, "file_name": "cache/pos_embedprojConv_230.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_3_existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 856665088, "file_name": "cache/pos_embedprojConv_231.const", "file_size": 5382144 }, "onnx::MatMul_8129": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 862047232, "file_name": "cache/pos_embedprojConv_232.const", "file_size": 10764288 }, "onnx::MatMul_8130": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 872811520, "file_name": "cache/pos_embedprojConv_233.const", "file_size": 10764288 }, "existing_model.transformer_blocks.8.norm1_context.linear.weight_5_1_21_27_17_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 883575808, "file_name": "cache/pos_embedprojConv_234.const", "file_size": 2691072 }, "/transformer_blocks.9/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 886266880, "file_name": "cache/pos_embedprojConv_235.const", "file_size": 3072 }, "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 886269952, "file_name": "cache/pos_embedprojConv_236.const", "file_size": 3072 }, "existing_model.transformer_blocks.8.norm1.linear.weight_5_1_20_27_16_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 886273024, "file_name": "cache/pos_embedprojConv_237.const", "file_size": 2691072 }, "/transformer_blocks.9/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 888964096, "file_name": "cache/pos_embedprojConv_238.const", "file_size": 3072 }, "/transformer_blocks.9/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 888967168, "file_name": "cache/pos_embedprojConv_239.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_0_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 888970240, "file_name": "cache/pos_embedprojConv_240.const", "file_size": 5382144 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_0_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 894352384, "file_name": "cache/pos_embedprojConv_241.const", "file_size": 5382144 }, "onnx::MatMul_8134_onnx::MatMul_8131": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 899734528, "file_name": "cache/pos_embedprojConv_242.const", "file_size": 5382144 }, "onnx::MatMul_8135_onnx::MatMul_8132": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 905116672, "file_name": "cache/pos_embedprojConv_243.const", "file_size": 5382144 }, "onnx::MatMul_8136_onnx::MatMul_8133": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 910498816, "file_name": "cache/pos_embedprojConv_244.const", "file_size": 5382144 }, "onnx::MatMul_8151": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 915880960, "file_name": "cache/pos_embedprojConv_245.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 918572032, "file_name": "cache/pos_embedprojConv_246.const", "file_size": 2691072 }, "onnx::MatMul_8150": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 921263104, "file_name": "cache/pos_embedprojConv_247.const", "file_size": 2691072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 923954176, "file_name": "cache/pos_embedprojConv_248.const", "file_size": 2691072 }, "/transformer_blocks.9/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 926645248, "file_name": "cache/pos_embedprojConv_249.const", "file_size": 3072 }, "/transformer_blocks.9/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 926648320, "file_name": "cache/pos_embedprojConv_250.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_3_existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 926651392, "file_name": "cache/pos_embedprojConv_251.const", "file_size": 5382144 }, "onnx::MatMul_8152": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 932033536, "file_name": "cache/pos_embedprojConv_252.const", "file_size": 10764288 }, "onnx::MatMul_8153": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 942797824, "file_name": "cache/pos_embedprojConv_253.const", "file_size": 10764288 }, "existing_model.transformer_blocks.9.norm1.linear.weight_5_1_22_27_18_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 953562112, "file_name": "cache/pos_embedprojConv_254.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 956253184, "file_name": "cache/pos_embedprojConv_255.const", "file_size": 3072 }, "/transformer_blocks.10/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 956256256, "file_name": "cache/pos_embedprojConv_256.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_0_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 956259328, "file_name": "cache/pos_embedprojConv_257.const", "file_size": 5382144 }, "/transformer_blocks.9/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 961641472, "file_name": "cache/pos_embedprojConv_258.const", "file_size": 3072 }, "/transformer_blocks.9/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 961644544, "file_name": "cache/pos_embedprojConv_259.const", "file_size": 3072 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_3_existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 961647616, "file_name": "cache/pos_embedprojConv_260.const", "file_size": 5382144 }, "onnx::MatMul_8154": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 967029760, "file_name": "cache/pos_embedprojConv_261.const", "file_size": 10764288 }, "onnx::MatMul_8155": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 977794048, "file_name": "cache/pos_embedprojConv_262.const", "file_size": 10764288 }, "existing_model.transformer_blocks.9.norm1_context.linear.weight_5_1_23_27_19_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 988558336, "file_name": "cache/pos_embedprojConv_263.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 991249408, "file_name": "cache/pos_embedprojConv_264.const", "file_size": 3072 }, "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 991252480, "file_name": "cache/pos_embedprojConv_265.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_0_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 991255552, "file_name": "cache/pos_embedprojConv_266.const", "file_size": 5382144 }, "onnx::MatMul_8159_onnx::MatMul_8156": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 996637696, "file_name": "cache/pos_embedprojConv_267.const", "file_size": 5382144 }, "onnx::MatMul_8160_onnx::MatMul_8157": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1002019840, "file_name": "cache/pos_embedprojConv_268.const", "file_size": 5382144 }, "onnx::MatMul_8161_onnx::MatMul_8158": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1007401984, "file_name": "cache/pos_embedprojConv_269.const", "file_size": 5382144 }, "onnx::MatMul_8175": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1012784128, "file_name": "cache/pos_embedprojConv_270.const", "file_size": 2691072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1015475200, "file_name": "cache/pos_embedprojConv_271.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1018166272, "file_name": "cache/pos_embedprojConv_272.const", "file_size": 3072 }, "/transformer_blocks.10/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1018169344, "file_name": "cache/pos_embedprojConv_273.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_3_existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1018172416, "file_name": "cache/pos_embedprojConv_274.const", "file_size": 5382144 }, "onnx::MatMul_8177": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1023554560, "file_name": "cache/pos_embedprojConv_275.const", "file_size": 10764288 }, "onnx::MatMul_8178": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1034318848, "file_name": "cache/pos_embedprojConv_276.const", "file_size": 10764288 }, "existing_model.transformer_blocks.10.norm1.linear.weight_5_1_24_27_20_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1045083136, "file_name": "cache/pos_embedprojConv_277.const", "file_size": 2691072 }, "onnx::MatMul_8176": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1047774208, "file_name": "cache/pos_embedprojConv_278.const", "file_size": 2691072 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1050465280, "file_name": "cache/pos_embedprojConv_279.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1053156352, "file_name": "cache/pos_embedprojConv_280.const", "file_size": 3072 }, "/transformer_blocks.10/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1053159424, "file_name": "cache/pos_embedprojConv_281.const", "file_size": 3072 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_3_existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1053162496, "file_name": "cache/pos_embedprojConv_282.const", "file_size": 5382144 }, "onnx::MatMul_8179": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1058544640, "file_name": "cache/pos_embedprojConv_283.const", "file_size": 10764288 }, "onnx::MatMul_8180": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1069308928, "file_name": "cache/pos_embedprojConv_284.const", "file_size": 10764288 }, "existing_model.transformer_blocks.10.norm1_context.linear.weight_5_1_25_27_21_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1080073216, "file_name": "cache/pos_embedprojConv_285.const", "file_size": 2691072 }, "/transformer_blocks.11/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082764288, "file_name": "cache/pos_embedprojConv_286.const", "file_size": 3072 }, "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082767360, "file_name": "cache/pos_embedprojConv_287.const", "file_size": 3072 }, "/transformer_blocks.11/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082770432, "file_name": "cache/pos_embedprojConv_288.const", "file_size": 3072 }, "/transformer_blocks.11/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082773504, "file_name": "cache/pos_embedprojConv_289.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_0_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1082776576, "file_name": "cache/pos_embedprojConv_290.const", "file_size": 5382144 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_0_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1088158720, "file_name": "cache/pos_embedprojConv_291.const", "file_size": 5382144 }, "onnx::MatMul_8184_onnx::MatMul_8181": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1093540864, "file_name": "cache/pos_embedprojConv_292.const", "file_size": 5382144 }, "onnx::MatMul_8185_onnx::MatMul_8182": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1098923008, "file_name": "cache/pos_embedprojConv_293.const", "file_size": 5382144 }, "onnx::MatMul_8186_onnx::MatMul_8183": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1104305152, "file_name": "cache/pos_embedprojConv_294.const", "file_size": 5382144 }, "onnx::MatMul_8200": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1109687296, "file_name": "cache/pos_embedprojConv_295.const", "file_size": 2691072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1112378368, "file_name": "cache/pos_embedprojConv_296.const", "file_size": 2691072 }, "/transformer_blocks.11/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1115069440, "file_name": "cache/pos_embedprojConv_297.const", "file_size": 3072 }, "/transformer_blocks.11/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1115072512, "file_name": "cache/pos_embedprojConv_298.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_3_existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1115075584, "file_name": "cache/pos_embedprojConv_299.const", "file_size": 5382144 }, "onnx::MatMul_8202": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1120457728, "file_name": "cache/pos_embedprojConv_300.const", "file_size": 10764288 }, "onnx::MatMul_8203": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1131222016, "file_name": "cache/pos_embedprojConv_301.const", "file_size": 10764288 }, "existing_model.transformer_blocks.11.norm1.linear.weight_5_1_26_27_22_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1141986304, "file_name": "cache/pos_embedprojConv_302.const", "file_size": 2691072 }, "onnx::MatMul_8201": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1144677376, "file_name": "cache/pos_embedprojConv_303.const", "file_size": 2691072 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1147368448, "file_name": "cache/pos_embedprojConv_304.const", "file_size": 2691072 }, "/transformer_blocks.11/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1150059520, "file_name": "cache/pos_embedprojConv_305.const", "file_size": 3072 }, "/transformer_blocks.11/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1150062592, "file_name": "cache/pos_embedprojConv_306.const", "file_size": 3072 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_3_existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1150065664, "file_name": "cache/pos_embedprojConv_307.const", "file_size": 5382144 }, "onnx::MatMul_8204": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1155447808, "file_name": "cache/pos_embedprojConv_308.const", "file_size": 10764288 }, "onnx::MatMul_8205": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1166212096, "file_name": "cache/pos_embedprojConv_309.const", "file_size": 10764288 }, "existing_model.transformer_blocks.11.norm1_context.linear.weight_5_1_27_27_23_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1176976384, "file_name": "cache/pos_embedprojConv_310.const", "file_size": 2691072 }, "/transformer_blocks.12/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1179667456, "file_name": "cache/pos_embedprojConv_311.const", "file_size": 3072 }, "/transformer_blocks.12/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1179670528, "file_name": "cache/pos_embedprojConv_312.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_0_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1179673600, "file_name": "cache/pos_embedprojConv_313.const", "file_size": 5382144 }, "/transformer_blocks.12/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1185055744, "file_name": "cache/pos_embedprojConv_314.const", "file_size": 3072 }, "/transformer_blocks.12/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1185058816, "file_name": "cache/pos_embedprojConv_315.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_0_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1185061888, "file_name": "cache/pos_embedprojConv_316.const", "file_size": 5382144 }, "onnx::MatMul_8209_onnx::MatMul_8206": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1190444032, "file_name": "cache/pos_embedprojConv_317.const", "file_size": 5382144 }, "onnx::MatMul_8210_onnx::MatMul_8207": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1195826176, "file_name": "cache/pos_embedprojConv_318.const", "file_size": 5382144 }, "onnx::MatMul_8211_onnx::MatMul_8208": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1201208320, "file_name": "cache/pos_embedprojConv_319.const", "file_size": 5382144 }, "onnx::MatMul_8225": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1206590464, "file_name": "cache/pos_embedprojConv_320.const", "file_size": 2691072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1209281536, "file_name": "cache/pos_embedprojConv_321.const", "file_size": 2691072 }, "/transformer_blocks.12/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1211972608, "file_name": "cache/pos_embedprojConv_322.const", "file_size": 3072 }, "/transformer_blocks.12/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1211975680, "file_name": "cache/pos_embedprojConv_323.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_3_existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1211978752, "file_name": "cache/pos_embedprojConv_324.const", "file_size": 5382144 }, "onnx::MatMul_8227": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1217360896, "file_name": "cache/pos_embedprojConv_325.const", "file_size": 10764288 }, "onnx::MatMul_8228": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1228125184, "file_name": "cache/pos_embedprojConv_326.const", "file_size": 10764288 }, "existing_model.transformer_blocks.12.norm1.linear.weight_5_1_28_27_24_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1238889472, "file_name": "cache/pos_embedprojConv_327.const", "file_size": 2691072 }, "onnx::MatMul_8226": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1241580544, "file_name": "cache/pos_embedprojConv_328.const", "file_size": 2691072 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1244271616, "file_name": "cache/pos_embedprojConv_329.const", "file_size": 2691072 }, "/transformer_blocks.12/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1246962688, "file_name": "cache/pos_embedprojConv_330.const", "file_size": 3072 }, "/transformer_blocks.12/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1246965760, "file_name": "cache/pos_embedprojConv_331.const", "file_size": 3072 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_3_existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1246968832, "file_name": "cache/pos_embedprojConv_332.const", "file_size": 5382144 }, "onnx::MatMul_8229": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1252350976, "file_name": "cache/pos_embedprojConv_333.const", "file_size": 10764288 }, "onnx::MatMul_8230": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1263115264, "file_name": "cache/pos_embedprojConv_334.const", "file_size": 10764288 }, "existing_model.transformer_blocks.12.norm1_context.linear.weight_5_1_29_27_25_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1273879552, "file_name": "cache/pos_embedprojConv_335.const", "file_size": 2691072 }, "/transformer_blocks.13/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1276570624, "file_name": "cache/pos_embedprojConv_336.const", "file_size": 3072 }, "/transformer_blocks.13/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1276573696, "file_name": "cache/pos_embedprojConv_337.const", "file_size": 3072 }, "/transformer_blocks.13/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1276576768, "file_name": "cache/pos_embedprojConv_338.const", "file_size": 3072 }, "/transformer_blocks.13/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1276579840, "file_name": "cache/pos_embedprojConv_339.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_0_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1276582912, "file_name": "cache/pos_embedprojConv_340.const", "file_size": 5382144 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_0_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1281965056, "file_name": "cache/pos_embedprojConv_341.const", "file_size": 5382144 }, "onnx::MatMul_8234_onnx::MatMul_8231": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1287347200, "file_name": "cache/pos_embedprojConv_342.const", "file_size": 5382144 }, "onnx::MatMul_8235_onnx::MatMul_8232": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1292729344, "file_name": "cache/pos_embedprojConv_343.const", "file_size": 5382144 }, "onnx::MatMul_8236_onnx::MatMul_8233": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1298111488, "file_name": "cache/pos_embedprojConv_344.const", "file_size": 5382144 }, "onnx::MatMul_8250": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1303493632, "file_name": "cache/pos_embedprojConv_345.const", "file_size": 2691072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1306184704, "file_name": "cache/pos_embedprojConv_346.const", "file_size": 2691072 }, "/transformer_blocks.13/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1308875776, "file_name": "cache/pos_embedprojConv_347.const", "file_size": 3072 }, "/transformer_blocks.13/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1308878848, "file_name": "cache/pos_embedprojConv_348.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_3_existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1308881920, "file_name": "cache/pos_embedprojConv_349.const", "file_size": 5382144 }, "onnx::MatMul_8252": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1314264064, "file_name": "cache/pos_embedprojConv_350.const", "file_size": 10764288 }, "onnx::MatMul_8253": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1325028352, "file_name": "cache/pos_embedprojConv_351.const", "file_size": 10764288 }, "existing_model.transformer_blocks.13.norm1.linear.weight_5_1_30_27_26_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1335792640, "file_name": "cache/pos_embedprojConv_352.const", "file_size": 2691072 }, "onnx::MatMul_8251": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1338483712, "file_name": "cache/pos_embedprojConv_353.const", "file_size": 2691072 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1341174784, "file_name": "cache/pos_embedprojConv_354.const", "file_size": 2691072 }, "/transformer_blocks.13/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1343865856, "file_name": "cache/pos_embedprojConv_355.const", "file_size": 3072 }, "/transformer_blocks.13/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1343868928, "file_name": "cache/pos_embedprojConv_356.const", "file_size": 3072 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_3_existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1343872000, "file_name": "cache/pos_embedprojConv_357.const", "file_size": 5382144 }, "onnx::MatMul_8254": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1349254144, "file_name": "cache/pos_embedprojConv_358.const", "file_size": 10764288 }, "onnx::MatMul_8255": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1360018432, "file_name": "cache/pos_embedprojConv_359.const", "file_size": 10764288 }, "existing_model.transformer_blocks.13.norm1_context.linear.weight_5_1_31_27_27_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1370782720, "file_name": "cache/pos_embedprojConv_360.const", "file_size": 2691072 }, "/transformer_blocks.14/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1373473792, "file_name": "cache/pos_embedprojConv_361.const", "file_size": 3072 }, "/transformer_blocks.14/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1373476864, "file_name": "cache/pos_embedprojConv_362.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_0_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1373479936, "file_name": "cache/pos_embedprojConv_363.const", "file_size": 5382144 }, "/transformer_blocks.14/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1378862080, "file_name": "cache/pos_embedprojConv_364.const", "file_size": 3072 }, "/transformer_blocks.14/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1378865152, "file_name": "cache/pos_embedprojConv_365.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_0_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1378868224, "file_name": "cache/pos_embedprojConv_366.const", "file_size": 5382144 }, "onnx::MatMul_8259_onnx::MatMul_8256": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1384250368, "file_name": "cache/pos_embedprojConv_367.const", "file_size": 5382144 }, "onnx::MatMul_8260_onnx::MatMul_8257": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1389632512, "file_name": "cache/pos_embedprojConv_368.const", "file_size": 5382144 }, "onnx::MatMul_8261_onnx::MatMul_8258": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1395014656, "file_name": "cache/pos_embedprojConv_369.const", "file_size": 5382144 }, "onnx::MatMul_8275": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1400396800, "file_name": "cache/pos_embedprojConv_370.const", "file_size": 2691072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1403087872, "file_name": "cache/pos_embedprojConv_371.const", "file_size": 2691072 }, "/transformer_blocks.14/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1405778944, "file_name": "cache/pos_embedprojConv_372.const", "file_size": 3072 }, "/transformer_blocks.14/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1405782016, "file_name": "cache/pos_embedprojConv_373.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_3_existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1405785088, "file_name": "cache/pos_embedprojConv_374.const", "file_size": 5382144 }, "onnx::MatMul_8277": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1411167232, "file_name": "cache/pos_embedprojConv_375.const", "file_size": 10764288 }, "onnx::MatMul_8278": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1421931520, "file_name": "cache/pos_embedprojConv_376.const", "file_size": 10764288 }, "existing_model.transformer_blocks.14.norm1.linear.weight_5_1_32_27_28_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1432695808, "file_name": "cache/pos_embedprojConv_377.const", "file_size": 2691072 }, "onnx::MatMul_8276": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1435386880, "file_name": "cache/pos_embedprojConv_378.const", "file_size": 2691072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1438077952, "file_name": "cache/pos_embedprojConv_379.const", "file_size": 2691072 }, "/transformer_blocks.14/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1440769024, "file_name": "cache/pos_embedprojConv_380.const", "file_size": 3072 }, "/transformer_blocks.14/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1440772096, "file_name": "cache/pos_embedprojConv_381.const", "file_size": 3072 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_3_existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1440775168, "file_name": "cache/pos_embedprojConv_382.const", "file_size": 5382144 }, "onnx::MatMul_8279": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1446157312, "file_name": "cache/pos_embedprojConv_383.const", "file_size": 10764288 }, "onnx::MatMul_8280": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1456921600, "file_name": "cache/pos_embedprojConv_384.const", "file_size": 10764288 }, "existing_model.transformer_blocks.14.norm1_context.linear.weight_5_1_33_27_29_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1467685888, "file_name": "cache/pos_embedprojConv_385.const", "file_size": 2691072 }, "/transformer_blocks.15/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1470376960, "file_name": "cache/pos_embedprojConv_386.const", "file_size": 3072 }, "/transformer_blocks.15/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1470380032, "file_name": "cache/pos_embedprojConv_387.const", "file_size": 3072 }, "/transformer_blocks.15/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1470383104, "file_name": "cache/pos_embedprojConv_388.const", "file_size": 3072 }, "/transformer_blocks.15/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1470386176, "file_name": "cache/pos_embedprojConv_389.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_0_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1470389248, "file_name": "cache/pos_embedprojConv_390.const", "file_size": 5382144 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_0_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1475771392, "file_name": "cache/pos_embedprojConv_391.const", "file_size": 5382144 }, "onnx::MatMul_8284_onnx::MatMul_8281": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1481153536, "file_name": "cache/pos_embedprojConv_392.const", "file_size": 5382144 }, "onnx::MatMul_8285_onnx::MatMul_8282": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1486535680, "file_name": "cache/pos_embedprojConv_393.const", "file_size": 5382144 }, "onnx::MatMul_8286_onnx::MatMul_8283": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1491917824, "file_name": "cache/pos_embedprojConv_394.const", "file_size": 5382144 }, "onnx::MatMul_8300": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1497299968, "file_name": "cache/pos_embedprojConv_395.const", "file_size": 2691072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1499991040, "file_name": "cache/pos_embedprojConv_396.const", "file_size": 2691072 }, "/transformer_blocks.15/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1502682112, "file_name": "cache/pos_embedprojConv_397.const", "file_size": 3072 }, "/transformer_blocks.15/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1502685184, "file_name": "cache/pos_embedprojConv_398.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_3_existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1502688256, "file_name": "cache/pos_embedprojConv_399.const", "file_size": 5382144 }, "onnx::MatMul_8302": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1508070400, "file_name": "cache/pos_embedprojConv_400.const", "file_size": 10764288 }, "onnx::MatMul_8303": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1518834688, "file_name": "cache/pos_embedprojConv_401.const", "file_size": 10764288 }, "existing_model.transformer_blocks.15.norm1.linear.weight_5_1_34_27_30_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1529598976, "file_name": "cache/pos_embedprojConv_402.const", "file_size": 2691072 }, "onnx::MatMul_8301": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1532290048, "file_name": "cache/pos_embedprojConv_403.const", "file_size": 2691072 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1534981120, "file_name": "cache/pos_embedprojConv_404.const", "file_size": 2691072 }, "/transformer_blocks.15/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1537672192, "file_name": "cache/pos_embedprojConv_405.const", "file_size": 3072 }, "/transformer_blocks.15/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1537675264, "file_name": "cache/pos_embedprojConv_406.const", "file_size": 3072 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_3_existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1537678336, "file_name": "cache/pos_embedprojConv_407.const", "file_size": 5382144 }, "onnx::MatMul_8304": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1543060480, "file_name": "cache/pos_embedprojConv_408.const", "file_size": 10764288 }, "onnx::MatMul_8305": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1553824768, "file_name": "cache/pos_embedprojConv_409.const", "file_size": 10764288 }, "existing_model.transformer_blocks.15.norm1_context.linear.weight_5_1_35_27_31_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1564589056, "file_name": "cache/pos_embedprojConv_410.const", "file_size": 2691072 }, "/transformer_blocks.16/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1567280128, "file_name": "cache/pos_embedprojConv_411.const", "file_size": 3072 }, "/transformer_blocks.16/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1567283200, "file_name": "cache/pos_embedprojConv_412.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_0_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1567286272, "file_name": "cache/pos_embedprojConv_413.const", "file_size": 5382144 }, "/transformer_blocks.16/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1572668416, "file_name": "cache/pos_embedprojConv_414.const", "file_size": 3072 }, "/transformer_blocks.16/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1572671488, "file_name": "cache/pos_embedprojConv_415.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_0_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1572674560, "file_name": "cache/pos_embedprojConv_416.const", "file_size": 5382144 }, "onnx::MatMul_8309_onnx::MatMul_8306": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1578056704, "file_name": "cache/pos_embedprojConv_417.const", "file_size": 5382144 }, "onnx::MatMul_8310_onnx::MatMul_8307": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1583438848, "file_name": "cache/pos_embedprojConv_418.const", "file_size": 5382144 }, "onnx::MatMul_8311_onnx::MatMul_8308": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1588820992, "file_name": "cache/pos_embedprojConv_419.const", "file_size": 5382144 }, "onnx::MatMul_8325": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1594203136, "file_name": "cache/pos_embedprojConv_420.const", "file_size": 2691072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1596894208, "file_name": "cache/pos_embedprojConv_421.const", "file_size": 2691072 }, "/transformer_blocks.16/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1599585280, "file_name": "cache/pos_embedprojConv_422.const", "file_size": 3072 }, "/transformer_blocks.16/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1599588352, "file_name": "cache/pos_embedprojConv_423.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_3_existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1599591424, "file_name": "cache/pos_embedprojConv_424.const", "file_size": 5382144 }, "onnx::MatMul_8327": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1604973568, "file_name": "cache/pos_embedprojConv_425.const", "file_size": 10764288 }, "onnx::MatMul_8328": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1615737856, "file_name": "cache/pos_embedprojConv_426.const", "file_size": 10764288 }, "existing_model.transformer_blocks.16.norm1.linear.weight_5_1_36_27_32_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1626502144, "file_name": "cache/pos_embedprojConv_427.const", "file_size": 2691072 }, "onnx::MatMul_8326": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1629193216, "file_name": "cache/pos_embedprojConv_428.const", "file_size": 2691072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1631884288, "file_name": "cache/pos_embedprojConv_429.const", "file_size": 2691072 }, "/transformer_blocks.16/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1634575360, "file_name": "cache/pos_embedprojConv_430.const", "file_size": 3072 }, "/transformer_blocks.16/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1634578432, "file_name": "cache/pos_embedprojConv_431.const", "file_size": 3072 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_3_existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1634581504, "file_name": "cache/pos_embedprojConv_432.const", "file_size": 5382144 }, "onnx::MatMul_8329": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1639963648, "file_name": "cache/pos_embedprojConv_433.const", "file_size": 10764288 }, "onnx::MatMul_8330": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1650727936, "file_name": "cache/pos_embedprojConv_434.const", "file_size": 10764288 }, "existing_model.transformer_blocks.16.norm1_context.linear.weight_5_1_37_27_33_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1661492224, "file_name": "cache/pos_embedprojConv_435.const", "file_size": 2691072 }, "/transformer_blocks.17/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1664183296, "file_name": "cache/pos_embedprojConv_436.const", "file_size": 3072 }, "/transformer_blocks.17/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1664186368, "file_name": "cache/pos_embedprojConv_437.const", "file_size": 3072 }, "/transformer_blocks.17/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1664189440, "file_name": "cache/pos_embedprojConv_438.const", "file_size": 3072 }, "/transformer_blocks.17/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1664192512, "file_name": "cache/pos_embedprojConv_439.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_0_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1664195584, "file_name": "cache/pos_embedprojConv_440.const", "file_size": 5382144 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_0_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1669577728, "file_name": "cache/pos_embedprojConv_441.const", "file_size": 5382144 }, "onnx::MatMul_8334_onnx::MatMul_8331": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1674959872, "file_name": "cache/pos_embedprojConv_442.const", "file_size": 5382144 }, "onnx::MatMul_8335_onnx::MatMul_8332": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1680342016, "file_name": "cache/pos_embedprojConv_443.const", "file_size": 5382144 }, "onnx::MatMul_8336_onnx::MatMul_8333": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1685724160, "file_name": "cache/pos_embedprojConv_444.const", "file_size": 5382144 }, "onnx::MatMul_8350": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1691106304, "file_name": "cache/pos_embedprojConv_445.const", "file_size": 2691072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1693797376, "file_name": "cache/pos_embedprojConv_446.const", "file_size": 2691072 }, "/transformer_blocks.17/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1696488448, "file_name": "cache/pos_embedprojConv_447.const", "file_size": 3072 }, "/transformer_blocks.17/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1696491520, "file_name": "cache/pos_embedprojConv_448.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_3_existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1696494592, "file_name": "cache/pos_embedprojConv_449.const", "file_size": 5382144 }, "onnx::MatMul_8352": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1701876736, "file_name": "cache/pos_embedprojConv_450.const", "file_size": 10764288 }, "onnx::MatMul_8353": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1712641024, "file_name": "cache/pos_embedprojConv_451.const", "file_size": 10764288 }, "existing_model.transformer_blocks.17.norm1.linear.weight_5_1_38_27_34_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1723405312, "file_name": "cache/pos_embedprojConv_452.const", "file_size": 2691072 }, "onnx::MatMul_8351": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1726096384, "file_name": "cache/pos_embedprojConv_453.const", "file_size": 2691072 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1728787456, "file_name": "cache/pos_embedprojConv_454.const", "file_size": 2691072 }, "/transformer_blocks.17/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1731478528, "file_name": "cache/pos_embedprojConv_455.const", "file_size": 3072 }, "/transformer_blocks.17/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1731481600, "file_name": "cache/pos_embedprojConv_456.const", "file_size": 3072 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_3_existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1731484672, "file_name": "cache/pos_embedprojConv_457.const", "file_size": 5382144 }, "onnx::MatMul_8354": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1736866816, "file_name": "cache/pos_embedprojConv_458.const", "file_size": 10764288 }, "onnx::MatMul_8355": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1747631104, "file_name": "cache/pos_embedprojConv_459.const", "file_size": 10764288 }, "existing_model.transformer_blocks.17.norm1_context.linear.weight_5_1_39_27_35_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1758395392, "file_name": "cache/pos_embedprojConv_460.const", "file_size": 2691072 }, "/transformer_blocks.18/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1761086464, "file_name": "cache/pos_embedprojConv_461.const", "file_size": 3072 }, "/transformer_blocks.18/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1761089536, "file_name": "cache/pos_embedprojConv_462.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_0_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1761092608, "file_name": "cache/pos_embedprojConv_463.const", "file_size": 5382144 }, "/transformer_blocks.18/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1766474752, "file_name": "cache/pos_embedprojConv_464.const", "file_size": 3072 }, "/transformer_blocks.18/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1766477824, "file_name": "cache/pos_embedprojConv_465.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_0_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1766480896, "file_name": "cache/pos_embedprojConv_466.const", "file_size": 5382144 }, "onnx::MatMul_8359_onnx::MatMul_8356": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1771863040, "file_name": "cache/pos_embedprojConv_467.const", "file_size": 5382144 }, "onnx::MatMul_8360_onnx::MatMul_8357": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1777245184, "file_name": "cache/pos_embedprojConv_468.const", "file_size": 5382144 }, "onnx::MatMul_8361_onnx::MatMul_8358": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1782627328, "file_name": "cache/pos_embedprojConv_469.const", "file_size": 5382144 }, "onnx::MatMul_8375": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1788009472, "file_name": "cache/pos_embedprojConv_470.const", "file_size": 2691072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1790700544, "file_name": "cache/pos_embedprojConv_471.const", "file_size": 2691072 }, "/transformer_blocks.18/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1793391616, "file_name": "cache/pos_embedprojConv_472.const", "file_size": 3072 }, "/transformer_blocks.18/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1793394688, "file_name": "cache/pos_embedprojConv_473.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_3_existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1793397760, "file_name": "cache/pos_embedprojConv_474.const", "file_size": 5382144 }, "onnx::MatMul_8377": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1798779904, "file_name": "cache/pos_embedprojConv_475.const", "file_size": 10764288 }, "onnx::MatMul_8378": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1809544192, "file_name": "cache/pos_embedprojConv_476.const", "file_size": 10764288 }, "existing_model.transformer_blocks.18.norm1.linear.weight_5_1_40_27_36_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1820308480, "file_name": "cache/pos_embedprojConv_477.const", "file_size": 2691072 }, "onnx::MatMul_8376": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1822999552, "file_name": "cache/pos_embedprojConv_478.const", "file_size": 2691072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1825690624, "file_name": "cache/pos_embedprojConv_479.const", "file_size": 2691072 }, "/transformer_blocks.18/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1828381696, "file_name": "cache/pos_embedprojConv_480.const", "file_size": 3072 }, "/transformer_blocks.18/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1828384768, "file_name": "cache/pos_embedprojConv_481.const", "file_size": 3072 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_3_existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1828387840, "file_name": "cache/pos_embedprojConv_482.const", "file_size": 5382144 }, "onnx::MatMul_8379": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1833769984, "file_name": "cache/pos_embedprojConv_483.const", "file_size": 10764288 }, "onnx::MatMul_8380": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1844534272, "file_name": "cache/pos_embedprojConv_484.const", "file_size": 10764288 }, "existing_model.transformer_blocks.18.norm1_context.linear.weight_5_1_41_27_37_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1855298560, "file_name": "cache/pos_embedprojConv_485.const", "file_size": 2691072 }, "/transformer_blocks.19/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1857989632, "file_name": "cache/pos_embedprojConv_486.const", "file_size": 3072 }, "/transformer_blocks.19/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1857992704, "file_name": "cache/pos_embedprojConv_487.const", "file_size": 3072 }, "/transformer_blocks.19/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1857995776, "file_name": "cache/pos_embedprojConv_488.const", "file_size": 3072 }, "/transformer_blocks.19/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1857998848, "file_name": "cache/pos_embedprojConv_489.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_0_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1858001920, "file_name": "cache/pos_embedprojConv_490.const", "file_size": 5382144 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_0_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1863384064, "file_name": "cache/pos_embedprojConv_491.const", "file_size": 5382144 }, "onnx::MatMul_8384_onnx::MatMul_8381": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1868766208, "file_name": "cache/pos_embedprojConv_492.const", "file_size": 5382144 }, "onnx::MatMul_8385_onnx::MatMul_8382": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1874148352, "file_name": "cache/pos_embedprojConv_493.const", "file_size": 5382144 }, "onnx::MatMul_8386_onnx::MatMul_8383": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1879530496, "file_name": "cache/pos_embedprojConv_494.const", "file_size": 5382144 }, "onnx::MatMul_8400": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1884912640, "file_name": "cache/pos_embedprojConv_495.const", "file_size": 2691072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1887603712, "file_name": "cache/pos_embedprojConv_496.const", "file_size": 2691072 }, "/transformer_blocks.19/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1890294784, "file_name": "cache/pos_embedprojConv_497.const", "file_size": 3072 }, "/transformer_blocks.19/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1890297856, "file_name": "cache/pos_embedprojConv_498.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_3_existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1890300928, "file_name": "cache/pos_embedprojConv_499.const", "file_size": 5382144 }, "onnx::MatMul_8402": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1895683072, "file_name": "cache/pos_embedprojConv_500.const", "file_size": 10764288 }, "onnx::MatMul_8403": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1906447360, "file_name": "cache/pos_embedprojConv_501.const", "file_size": 10764288 }, "existing_model.transformer_blocks.19.norm1.linear.weight_5_1_42_27_38_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1917211648, "file_name": "cache/pos_embedprojConv_502.const", "file_size": 2691072 }, "onnx::MatMul_8401": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1919902720, "file_name": "cache/pos_embedprojConv_503.const", "file_size": 2691072 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1922593792, "file_name": "cache/pos_embedprojConv_504.const", "file_size": 2691072 }, "/transformer_blocks.19/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1925284864, "file_name": "cache/pos_embedprojConv_505.const", "file_size": 3072 }, "/transformer_blocks.19/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1925287936, "file_name": "cache/pos_embedprojConv_506.const", "file_size": 3072 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_3_existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1925291008, "file_name": "cache/pos_embedprojConv_507.const", "file_size": 5382144 }, "onnx::MatMul_8404": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1930673152, "file_name": "cache/pos_embedprojConv_508.const", "file_size": 10764288 }, "onnx::MatMul_8405": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1941437440, "file_name": "cache/pos_embedprojConv_509.const", "file_size": 10764288 }, "existing_model.transformer_blocks.19.norm1_context.linear.weight_5_1_43_27_39_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1952201728, "file_name": "cache/pos_embedprojConv_510.const", "file_size": 2691072 }, "/transformer_blocks.20/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1954892800, "file_name": "cache/pos_embedprojConv_511.const", "file_size": 3072 }, "/transformer_blocks.20/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1954895872, "file_name": "cache/pos_embedprojConv_512.const", "file_size": 3072 }, "/transformer_blocks.20/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1954898944, "file_name": "cache/pos_embedprojConv_513.const", "file_size": 3072 }, "/transformer_blocks.20/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1954902016, "file_name": "cache/pos_embedprojConv_514.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_0_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1954905088, "file_name": "cache/pos_embedprojConv_515.const", "file_size": 5382144 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_0_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1960287232, "file_name": "cache/pos_embedprojConv_516.const", "file_size": 5382144 }, "onnx::MatMul_8409_onnx::MatMul_8406": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1965669376, "file_name": "cache/pos_embedprojConv_517.const", "file_size": 5382144 }, "onnx::MatMul_8410_onnx::MatMul_8407": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1971051520, "file_name": "cache/pos_embedprojConv_518.const", "file_size": 5382144 }, "onnx::MatMul_8411_onnx::MatMul_8408": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1976433664, "file_name": "cache/pos_embedprojConv_519.const", "file_size": 5382144 }, "onnx::MatMul_8426": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1981815808, "file_name": "cache/pos_embedprojConv_520.const", "file_size": 2691072 }, "onnx::MatMul_8425": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1984506880, "file_name": "cache/pos_embedprojConv_521.const", "file_size": 2691072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1987197952, "file_name": "cache/pos_embedprojConv_522.const", "file_size": 2691072 }, "/transformer_blocks.20/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1989889024, "file_name": "cache/pos_embedprojConv_523.const", "file_size": 3072 }, "/transformer_blocks.20/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1989892096, "file_name": "cache/pos_embedprojConv_524.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_3_existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1989895168, "file_name": "cache/pos_embedprojConv_525.const", "file_size": 5382144 }, "onnx::MatMul_8427": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1995277312, "file_name": "cache/pos_embedprojConv_526.const", "file_size": 10764288 }, "onnx::MatMul_8428": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2006041600, "file_name": "cache/pos_embedprojConv_527.const", "file_size": 10764288 }, "existing_model.transformer_blocks.20.norm1.linear.weight_5_1_44_27_40_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2016805888, "file_name": "cache/pos_embedprojConv_528.const", "file_size": 2691072 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2019496960, "file_name": "cache/pos_embedprojConv_529.const", "file_size": 2691072 }, "/transformer_blocks.20/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2022188032, "file_name": "cache/pos_embedprojConv_530.const", "file_size": 3072 }, "/transformer_blocks.20/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2022191104, "file_name": "cache/pos_embedprojConv_531.const", "file_size": 3072 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_3_existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2022194176, "file_name": "cache/pos_embedprojConv_532.const", "file_size": 5382144 }, "onnx::MatMul_8429": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2027576320, "file_name": "cache/pos_embedprojConv_533.const", "file_size": 10764288 }, "onnx::MatMul_8430": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2038340608, "file_name": "cache/pos_embedprojConv_534.const", "file_size": 10764288 }, "existing_model.transformer_blocks.20.norm1_context.linear.weight_5_1_45_27_41_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2049104896, "file_name": "cache/pos_embedprojConv_535.const", "file_size": 2691072 }, "/transformer_blocks.21/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2051795968, "file_name": "cache/pos_embedprojConv_536.const", "file_size": 3072 }, "/transformer_blocks.21/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2051799040, "file_name": "cache/pos_embedprojConv_537.const", "file_size": 3072 }, "/transformer_blocks.21/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2051802112, "file_name": "cache/pos_embedprojConv_538.const", "file_size": 3072 }, "/transformer_blocks.21/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2051805184, "file_name": "cache/pos_embedprojConv_539.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_0_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2051808256, "file_name": "cache/pos_embedprojConv_540.const", "file_size": 5382144 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_0_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2057190400, "file_name": "cache/pos_embedprojConv_541.const", "file_size": 5382144 }, "onnx::MatMul_8434_onnx::MatMul_8431": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2062572544, "file_name": "cache/pos_embedprojConv_542.const", "file_size": 5382144 }, "onnx::MatMul_8435_onnx::MatMul_8432": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2067954688, "file_name": "cache/pos_embedprojConv_543.const", "file_size": 5382144 }, "onnx::MatMul_8436_onnx::MatMul_8433": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2073336832, "file_name": "cache/pos_embedprojConv_544.const", "file_size": 5382144 }, "onnx::MatMul_8450": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2078718976, "file_name": "cache/pos_embedprojConv_545.const", "file_size": 2691072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2081410048, "file_name": "cache/pos_embedprojConv_546.const", "file_size": 2691072 }, "/transformer_blocks.21/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2084101120, "file_name": "cache/pos_embedprojConv_547.const", "file_size": 3072 }, "/transformer_blocks.21/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2084104192, "file_name": "cache/pos_embedprojConv_548.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_3_existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2084107264, "file_name": "cache/pos_embedprojConv_549.const", "file_size": 5382144 }, "onnx::MatMul_8452": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2089489408, "file_name": "cache/pos_embedprojConv_550.const", "file_size": 10764288 }, "onnx::MatMul_8453": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2100253696, "file_name": "cache/pos_embedprojConv_551.const", "file_size": 10764288 }, "existing_model.transformer_blocks.21.norm1.linear.weight_5_1_46_27_42_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2111017984, "file_name": "cache/pos_embedprojConv_552.const", "file_size": 2691072 }, "onnx::MatMul_8451": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2113709056, "file_name": "cache/pos_embedprojConv_553.const", "file_size": 2691072 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2116400128, "file_name": "cache/pos_embedprojConv_554.const", "file_size": 2691072 }, "/transformer_blocks.21/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2119091200, "file_name": "cache/pos_embedprojConv_555.const", "file_size": 3072 }, "/transformer_blocks.21/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2119094272, "file_name": "cache/pos_embedprojConv_556.const", "file_size": 3072 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_3_existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2119097344, "file_name": "cache/pos_embedprojConv_557.const", "file_size": 5382144 }, "onnx::MatMul_8454": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2124479488, "file_name": "cache/pos_embedprojConv_558.const", "file_size": 10764288 }, "onnx::MatMul_8455": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2135243776, "file_name": "cache/pos_embedprojConv_559.const", "file_size": 10764288 }, "existing_model.transformer_blocks.21.norm1_context.linear.weight_5_1_47_27_43_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2146008064, "file_name": "cache/pos_embedprojConv_560.const", "file_size": 2691072 }, "/transformer_blocks.22/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2148699136, "file_name": "cache/pos_embedprojConv_561.const", "file_size": 3072 }, "/transformer_blocks.22/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2148702208, "file_name": "cache/pos_embedprojConv_562.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_0_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2148705280, "file_name": "cache/pos_embedprojConv_563.const", "file_size": 5382144 }, "/transformer_blocks.22/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2154087424, "file_name": "cache/pos_embedprojConv_564.const", "file_size": 3072 }, "/transformer_blocks.22/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2154090496, "file_name": "cache/pos_embedprojConv_565.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_0_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2154093568, "file_name": "cache/pos_embedprojConv_566.const", "file_size": 5382144 }, "onnx::MatMul_8459_onnx::MatMul_8456": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2159475712, "file_name": "cache/pos_embedprojConv_567.const", "file_size": 5382144 }, "onnx::MatMul_8460_onnx::MatMul_8457": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2164857856, "file_name": "cache/pos_embedprojConv_568.const", "file_size": 5382144 }, "onnx::MatMul_8461_onnx::MatMul_8458": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2170240000, "file_name": "cache/pos_embedprojConv_569.const", "file_size": 5382144 }, "onnx::MatMul_8475": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2175622144, "file_name": "cache/pos_embedprojConv_570.const", "file_size": 2691072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2178313216, "file_name": "cache/pos_embedprojConv_571.const", "file_size": 2691072 }, "/transformer_blocks.22/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2181004288, "file_name": "cache/pos_embedprojConv_572.const", "file_size": 3072 }, "/transformer_blocks.22/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2181007360, "file_name": "cache/pos_embedprojConv_573.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_3_existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2181010432, "file_name": "cache/pos_embedprojConv_574.const", "file_size": 5382144 }, "onnx::MatMul_8477": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2186392576, "file_name": "cache/pos_embedprojConv_575.const", "file_size": 10764288 }, "onnx::MatMul_8478": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2197156864, "file_name": "cache/pos_embedprojConv_576.const", "file_size": 10764288 }, "existing_model.transformer_blocks.22.norm1.linear.weight_5_1_48_27_44_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2207921152, "file_name": "cache/pos_embedprojConv_577.const", "file_size": 2691072 }, "onnx::MatMul_8476": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2210612224, "file_name": "cache/pos_embedprojConv_578.const", "file_size": 2691072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2213303296, "file_name": "cache/pos_embedprojConv_579.const", "file_size": 2691072 }, "/transformer_blocks.22/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2215994368, "file_name": "cache/pos_embedprojConv_580.const", "file_size": 3072 }, "/transformer_blocks.22/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2215997440, "file_name": "cache/pos_embedprojConv_581.const", "file_size": 3072 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_3_existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2216000512, "file_name": "cache/pos_embedprojConv_582.const", "file_size": 5382144 }, "onnx::MatMul_8479": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2221382656, "file_name": "cache/pos_embedprojConv_583.const", "file_size": 10764288 }, "onnx::MatMul_8480": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2232146944, "file_name": "cache/pos_embedprojConv_584.const", "file_size": 10764288 }, "existing_model.transformer_blocks.22.norm1_context.linear.weight_5_1_49_27_45_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2242911232, "file_name": "cache/pos_embedprojConv_585.const", "file_size": 2691072 }, "/transformer_blocks.23/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2245602304, "file_name": "cache/pos_embedprojConv_586.const", "file_size": 3072 }, "/transformer_blocks.23/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2245605376, "file_name": "cache/pos_embedprojConv_587.const", "file_size": 3072 }, "/transformer_blocks.23/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2245608448, "file_name": "cache/pos_embedprojConv_588.const", "file_size": 3072 }, "/transformer_blocks.23/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2245611520, "file_name": "cache/pos_embedprojConv_589.const", "file_size": 3072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_0_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2245614592, "file_name": "cache/pos_embedprojConv_590.const", "file_size": 5382144 }, "existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_0_existing_model.transformer_blocks.23.norm1_context.linear.weight_5_1_51_27_47_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2250996736, "file_name": "cache/pos_embedprojConv_591.const", "file_size": 5382144 }, "onnx::MatMul_8484_onnx::MatMul_8481": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2256378880, "file_name": "cache/pos_embedprojConv_592.const", "file_size": 5382144 }, "onnx::MatMul_8485_onnx::MatMul_8482": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2261761024, "file_name": "cache/pos_embedprojConv_593.const", "file_size": 5382144 }, "onnx::MatMul_8486_onnx::MatMul_8483": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2267143168, "file_name": "cache/pos_embedprojConv_594.const", "file_size": 5382144 }, "onnx::MatMul_8497": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2272525312, "file_name": "cache/pos_embedprojConv_595.const", "file_size": 2691072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2275216384, "file_name": "cache/pos_embedprojConv_596.const", "file_size": 2691072 }, "/transformer_blocks.23/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2277907456, "file_name": "cache/pos_embedprojConv_597.const", "file_size": 3072 }, "/transformer_blocks.23/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2277910528, "file_name": "cache/pos_embedprojConv_598.const", "file_size": 3072 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_3_existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2277913600, "file_name": "cache/pos_embedprojConv_599.const", "file_size": 5382144 }, "onnx::MatMul_8498": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2283295744, "file_name": "cache/pos_embedprojConv_600.const", "file_size": 10764288 }, "onnx::MatMul_8499": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 2294060032, "file_name": "cache/pos_embedprojConv_601.const", "file_size": 10764288 }, "existing_model.transformer_blocks.23.norm1.linear.weight_5_1_50_27_46_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 2304824320, "file_name": "cache/pos_embedprojConv_602.const", "file_size": 2691072 }, "/norm_out/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2307515392, "file_name": "cache/pos_embedprojConv_603.const", "file_size": 3072 }, "/norm_out/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 2307518464, "file_name": "cache/pos_embedprojConv_604.const", "file_size": 3072 }, "existing_model.norm_out.linear.weight_5_1_52_27_48_0_existing_model.norm_out.linear.weight_5_1_52_27_48_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 2307521536, "file_name": "cache/pos_embedprojConv_605.const", "file_size": 5382144 }, "/norm_out/Add_2_output_0.out0_0_95_bfp.out1_93_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 2312903680, "file_name": "cache/pos_embedprojConv_606.const", "file_size": 128 }, "onnx::MatMul_8500": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 112128 ], "size_in_bytes": 112128, "op_tensor_size": 112128, "offset": 2312903808, "file_name": "cache/pos_embedprojConv_607.const", "file_size": 112128 } }, "dynamic_shape_subgraph": true, "dynamic_shape_list": [ { "state_dim1": 1024, "floor(h/2)": 32, "batch_size": 2, "h": 64, "max_length + state_dim1": 1184, "max_length": 160, "w": 64, "floor(w/2)": 32 }, { "state_dim1": 1536, "floor(h/2)": 32, "batch_size": 2, "h": 64, "max_length + state_dim1": 1696, "max_length": 160, "w": 96, "floor(w/2)": 48 }, { "state_dim1": 1536, "floor(h/2)": 48, "batch_size": 2, "h": 96, "max_length + state_dim1": 1696, "max_length": 160, "w": 64, "floor(w/2)": 32 }, { "state_dim1": 2304, "floor(h/2)": 36, "batch_size": 2, "h": 72, "max_length + state_dim1": 2464, "max_length": 160, "w": 128, "floor(w/2)": 64 }, { "state_dim1": 2304, "floor(h/2)": 64, "batch_size": 2, "h": 128, "max_length + state_dim1": 2464, "max_length": 160, "w": 72, "floor(w/2)": 36 }, { "state_dim1": 3072, "floor(h/2)": 48, "batch_size": 2, "h": 96, "max_length + state_dim1": 3232, "max_length": 160, "w": 128, "floor(w/2)": 64 }, { "state_dim1": 3072, "floor(h/2)": 64, "batch_size": 2, "h": 128, "max_length + state_dim1": 3232, "max_length": 160, "w": 96, "floor(w/2)": 48 }, { "state_dim1": 4096, "floor(h/2)": 64, "batch_size": 2, "h": 128, "max_length + state_dim1": 4256, "max_length": 160, "w": 128, "floor(w/2)": 64 } ], "aux_info": {} }