{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [], "op_list": [ { "name": "/pos_embed/proj/Conv", "type": "SDConv", "in_args": [ "hidden_states_nhwc.out5_0_0" ], "const_args": [ "pos_embed.proj.weight" ], "out_args": [ "/pos_embed/Transpose_output_0.out5_0_0" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed/Add_2", "type": "SDAdd", "in_args": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2" ], "const_args": [], "out_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "b_shape": { "type": "str", "value": [ "1", "floor(h/2)*floor(w/2)", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed_input/proj/Conv", "type": "SDConv", "in_args": [ "controlnet_cond_nhwc.out5_0_1" ], "const_args": [ "pos_embed_input.proj.weight" ], "out_args": [ "/pos_embed_input/Transpose_output_0.out5_0_1" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add", "type": "SDAdd", "in_args": [ "/pos_embed/Add_2_output_0.out_35_1_2", "/pos_embed_input/Transpose_output_0.out5_0_1" ], "const_args": [], "out_args": [ "/Add_output_0.out_35_1_3" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/Cast_output_0.out17_3_3" ], "const_args": [ "time_text_embed.timestep_embedder.linear_1.weight_5_1_2" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "256" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "256", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/act/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "const_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1" ], "out_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "const_args": [ "time_text_embed.timestep_embedder.linear_2.weight_5_1_3" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "pooled_projections.out17_3_1" ], "const_args": [ "time_text_embed.text_embedder.linear_1.weight_5_1_0" ], "out_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "2048" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "2048", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/act_1/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "const_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0" ], "out_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "const_args": [ "time_text_embed.text_embedder.linear_2.weight_5_1_1" ], "out_args": [ "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/Add", "type": "SDAdd", "in_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "const_args": [], "out_args": [ "/time_text_embed/Add_output_0.out_35_1_4" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/silu/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/Add_output_0.out_35_1_4" ], "const_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2" ], "out_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp", "type": "SDCastBf2Bfp", "in_args": [ "encoder_hidden_states.out17_3_0" ], "const_args": [ "encoder_hidden_states.out17_3_0_bfp.wts" ], "out_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/MatMul", "type": "SDGemm_bfp", "in_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "const_args": [ "onnx::MatMul_3779" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "4096", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "const_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_output_0.out_35_1_3" ], "const_args": [ "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_0" ], "out_args": [ "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45" ], "const_args": [ "onnx::MatMul_3783_onnx::MatMul_3780" ], "out_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45" ], "const_args": [ "onnx::MatMul_3784_onnx::MatMul_3781" ], "out_args": [ "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45" ], "const_args": [ "onnx::MatMul_3785_onnx::MatMul_3782" ], "out_args": [ "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_0", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "const_args": [], "out_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_3800" ], "out_args": [ "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_3_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_4" ], "out_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "const_args": [ "onnx::MatMul_3803" ], "out_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "const_args": [ "onnx::MatMul_3804" ], "out_args": [ "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_3799" ], "out_args": [ "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/Add_output_0.out_35_1_3" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_output_0.out10_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_output_0.out10_44" ], "const_args": [ "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_4" ], "out_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_46" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_46" ], "const_args": [ "onnx::MatMul_3801" ], "out_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "const_args": [ "onnx::MatMul_3802" ], "out_args": [ "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_output_0.out10_44" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_3_output_0.out10_45" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out10_45" ], "const_args": [ "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_1" ], "out_args": [ "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.0/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out10_45" ], "const_args": [ "onnx::MatMul_4074" ], "out_args": [ "/controlnet_blocks.0/Add_output_0.out17_3_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_1" ], "out_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_3808_onnx::MatMul_3805" ], "out_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_3809_onnx::MatMul_3806" ], "out_args": [ "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_3810_onnx::MatMul_3807" ], "out_args": [ "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_1", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "const_args": [], "out_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_3824" ], "out_args": [ "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.0/Add_3_output_0.out10_45" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_4" ], "out_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_3826" ], "out_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" ], "const_args": [ "onnx::MatMul_3827" ], "out_args": [ "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.1/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "onnx::MatMul_4075" ], "out_args": [ "/controlnet_blocks.1/Add_output_0.out17_3_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_3825" ], "out_args": [ "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_4" ], "out_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "const_args": [ "onnx::MatMul_3828" ], "out_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" ], "const_args": [ "onnx::MatMul_3829" ], "out_args": [ "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_1" ], "out_args": [ "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_1" ], "out_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13" ], "const_args": [ "onnx::MatMul_3833_onnx::MatMul_3830" ], "out_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13" ], "const_args": [ "onnx::MatMul_3834_onnx::MatMul_3831" ], "out_args": [ "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13" ], "const_args": [ "onnx::MatMul_3835_onnx::MatMul_3832" ], "out_args": [ "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_2", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "const_args": [], "out_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_3850" ], "out_args": [ "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_4_output_0.out10_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_3849" ], "out_args": [ "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_output_0.out10_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_output_0.out10_12" ], "const_args": [ "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_4" ], "out_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_14" ], "const_args": [ "onnx::MatMul_3851" ], "out_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" ], "const_args": [ "onnx::MatMul_3852" ], "out_args": [ "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", "/transformer_blocks.2/Add_output_0.out10_12" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_3_output_0.out10_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.2/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out10_13" ], "const_args": [ "onnx::MatMul_4076" ], "out_args": [ "/controlnet_blocks.2/Add_output_0.out17_3_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_4_output_0.out10_14" ], "const_args": [ "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_4" ], "out_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_16" ], "const_args": [ "onnx::MatMul_3853" ], "out_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" ], "const_args": [ "onnx::MatMul_3854" ], "out_args": [ "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", "/transformer_blocks.2/Add_4_output_0.out10_14" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_7_output_0.out10_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out10_13" ], "const_args": [ "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_7_output_0.out10_15" ], "const_args": [ "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_1" ], "out_args": [ "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_1" ], "out_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17" ], "const_args": [ "onnx::MatMul_3858_onnx::MatMul_3855" ], "out_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17" ], "const_args": [ "onnx::MatMul_3859_onnx::MatMul_3856" ], "out_args": [ "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17" ], "const_args": [ "onnx::MatMul_3860_onnx::MatMul_3857" ], "out_args": [ "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_3", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "const_args": [], "out_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_3875" ], "out_args": [ "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.2/Add_7_output_0.out10_15" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_4_output_0.out10_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_3874" ], "out_args": [ "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.2/Add_3_output_0.out10_13" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_output_0.out10_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_output_0.out10_16" ], "const_args": [ "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_4" ], "out_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_18" ], "const_args": [ "onnx::MatMul_3876" ], "out_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" ], "const_args": [ "onnx::MatMul_3877" ], "out_args": [ "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", "/transformer_blocks.3/Add_output_0.out10_16" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_3_output_0.out10_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.3/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out10_17" ], "const_args": [ "onnx::MatMul_4077" ], "out_args": [ "/controlnet_blocks.3/Add_output_0.out17_3_40" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_4_output_0.out10_18" ], "const_args": [ "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_4" ], "out_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_20" ], "const_args": [ "onnx::MatMul_3878" ], "out_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" ], "const_args": [ "onnx::MatMul_3879" ], "out_args": [ "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", "/transformer_blocks.3/Add_4_output_0.out10_18" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_7_output_0.out10_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out10_17" ], "const_args": [ "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_7_output_0.out10_19" ], "const_args": [ "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_1" ], "out_args": [ "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_1" ], "out_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21" ], "const_args": [ "onnx::MatMul_3883_onnx::MatMul_3880" ], "out_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21" ], "const_args": [ "onnx::MatMul_3884_onnx::MatMul_3881" ], "out_args": [ "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21" ], "const_args": [ "onnx::MatMul_3885_onnx::MatMul_3882" ], "out_args": [ "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_4", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "const_args": [], "out_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_3900" ], "out_args": [ "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.3/Add_7_output_0.out10_19" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_4_output_0.out10_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_3899" ], "out_args": [ "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.3/Add_3_output_0.out10_17" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_output_0.out10_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_output_0.out10_20" ], "const_args": [ "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_4" ], "out_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_22" ], "const_args": [ "onnx::MatMul_3901" ], "out_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" ], "const_args": [ "onnx::MatMul_3902" ], "out_args": [ "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", "/transformer_blocks.4/Add_output_0.out10_20" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_3_output_0.out10_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.4/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out10_21" ], "const_args": [ "onnx::MatMul_4078" ], "out_args": [ "/controlnet_blocks.4/Add_output_0.out17_3_49" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_4_output_0.out10_22" ], "const_args": [ "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_4" ], "out_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_24" ], "const_args": [ "onnx::MatMul_3903" ], "out_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" ], "const_args": [ "onnx::MatMul_3904" ], "out_args": [ "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", "/transformer_blocks.4/Add_4_output_0.out10_22" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_7_output_0.out10_23" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out10_21" ], "const_args": [ "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_7_output_0.out10_23" ], "const_args": [ "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_1" ], "out_args": [ "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "const_args": [ "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_1" ], "out_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25" ], "const_args": [ "onnx::MatMul_3908_onnx::MatMul_3905" ], "out_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25" ], "const_args": [ "onnx::MatMul_3909_onnx::MatMul_3906" ], "out_args": [ "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25" ], "const_args": [ "onnx::MatMul_3910_onnx::MatMul_3907" ], "out_args": [ "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_5", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "const_args": [], "out_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_3925" ], "out_args": [ "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.4/Add_7_output_0.out10_23" ], "const_args": [ "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_4_output_0.out10_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_3924" ], "out_args": [ "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.4/Add_3_output_0.out10_21" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_output_0.out10_24" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_output_0.out10_24" ], "const_args": [ "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_3_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_4" ], "out_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_26" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_26" ], "const_args": [ "onnx::MatMul_3926" ], "out_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" ], "const_args": [ "onnx::MatMul_3927" ], "out_args": [ "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_32" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_32", "/transformer_blocks.5/Add_output_0.out10_24" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_3_output_0.out10_25" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.5/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.5/Add_3_output_0.out10_25" ], "const_args": [ "onnx::MatMul_4079" ], "out_args": [ "/controlnet_blocks.5/Add_output_0.out17_3_58" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_4_output_0.out10_26" ], "const_args": [ "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23" ], "const_args": [ "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_4" ], "out_args": [ "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_28" ], "const_args": [ "onnx::MatMul_3928" ], "out_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_31" ], "const_args": [ "onnx::MatMul_3929" ], "out_args": [ "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_57_bfp.out25_33" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_57_bfp.out25_33", "/transformer_blocks.5/Add_4_output_0.out10_26" ], "const_args": [ "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_7_output_0.out10_27" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_3_output_0.out10_25" ], "const_args": [ "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_7_output_0.out10_27" ], "const_args": [ "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_24_bfp.out15_24" ], "const_args": [ "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_0_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_1" ], "out_args": [ "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_25_bfp.out15_25" ], "const_args": [ "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_0_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_1" ], "out_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29" ], "const_args": [ "onnx::MatMul_3933_onnx::MatMul_3930" ], "out_args": [ "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29" ], "const_args": [ "onnx::MatMul_3934_onnx::MatMul_3931" ], "out_args": [ "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29" ], "const_args": [ "onnx::MatMul_3935_onnx::MatMul_3932" ], "out_args": [ "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_6", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18", "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19", "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20" ], "const_args": [], "out_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_3950" ], "out_args": [ "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.5/Add_7_output_0.out10_27" ], "const_args": [ "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_4_output_0.out10_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6" ], "const_args": [ "onnx::MatMul_3949" ], "out_args": [ "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/transformer_blocks.5/Add_3_output_0.out10_25" ], "const_args": [ "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_2_gma" ], "out_args": [ "/transformer_blocks.6/Add_output_0.out10_28" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_output_0.out10_28" ], "const_args": [ "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26" ], "const_args": [ "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_3_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_4" ], "out_args": [ "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_30" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_30" ], "const_args": [ "onnx::MatMul_3951" ], "out_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_35" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_35" ], "const_args": [ "onnx::MatMul_3952" ], "out_args": [ "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_65_bfp.out25_37" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_65_bfp.out25_37", "/transformer_blocks.6/Add_output_0.out10_28" ], "const_args": [ "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_3_output_0.out10_29" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.6/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.6/Add_3_output_0.out10_29" ], "const_args": [ "onnx::MatMul_4080" ], "out_args": [ "/controlnet_blocks.6/Add_output_0.out17_3_67" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_4_output_0.out10_30" ], "const_args": [ "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27" ], "const_args": [ "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_3_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_4" ], "out_args": [ "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_32" ], "const_args": [ "onnx::MatMul_3953" ], "out_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_36" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_36" ], "const_args": [ "onnx::MatMul_3954" ], "out_args": [ "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_66_bfp.out25_38" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.6/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_66_bfp.out25_38", "/transformer_blocks.6/Add_4_output_0.out10_30" ], "const_args": [ "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_5_gma" ], "out_args": [ "/transformer_blocks.6/Add_7_output_0.out10_31" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_3_output_0.out10_29" ], "const_args": [ "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.6/Add_7_output_0.out10_31" ], "const_args": [ "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_28_bfp.out15_28" ], "const_args": [ "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_0_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_1" ], "out_args": [ "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_29_bfp.out15_29" ], "const_args": [ "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_0_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_1" ], "out_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33" ], "const_args": [ "onnx::MatMul_3958_onnx::MatMul_3955" ], "out_args": [ "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33" ], "const_args": [ "onnx::MatMul_3959_onnx::MatMul_3956" ], "out_args": [ "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33" ], "const_args": [ "onnx::MatMul_3960_onnx::MatMul_3957" ], "out_args": [ "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_7", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21", "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22", "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23" ], "const_args": [], "out_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "const_args": [ "onnx::MatMul_3975" ], "out_args": [ "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.6/Add_7_output_0.out10_31" ], "const_args": [ "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_4_output_0.out10_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7" ], "const_args": [ "onnx::MatMul_3974" ], "out_args": [ "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/transformer_blocks.6/Add_3_output_0.out10_29" ], "const_args": [ "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_2_gma" ], "out_args": [ "/transformer_blocks.7/Add_output_0.out10_32" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_output_0.out10_32" ], "const_args": [ "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30" ], "const_args": [ "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_3_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_4" ], "out_args": [ "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_34" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_34" ], "const_args": [ "onnx::MatMul_3976" ], "out_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_40" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_40" ], "const_args": [ "onnx::MatMul_3977" ], "out_args": [ "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_74_bfp.out25_42" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_74_bfp.out25_42", "/transformer_blocks.7/Add_output_0.out10_32" ], "const_args": [ "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_3_output_0.out10_33" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.7/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.7/Add_3_output_0.out10_33" ], "const_args": [ "onnx::MatMul_4081" ], "out_args": [ "/controlnet_blocks.7/Add_output_0.out17_3_76" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_4_output_0.out10_34" ], "const_args": [ "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31" ], "const_args": [ "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_3_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_4" ], "out_args": [ "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_36" ], "const_args": [ "onnx::MatMul_3978" ], "out_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_41" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_41" ], "const_args": [ "onnx::MatMul_3979" ], "out_args": [ "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_75_bfp.out25_43" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.7/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_75_bfp.out25_43", "/transformer_blocks.7/Add_4_output_0.out10_34" ], "const_args": [ "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_5_gma" ], "out_args": [ "/transformer_blocks.7/Add_7_output_0.out10_35" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_3_output_0.out10_33" ], "const_args": [ "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.7/Add_7_output_0.out10_35" ], "const_args": [ "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_32_bfp.out15_32" ], "const_args": [ "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_0_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_1" ], "out_args": [ "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_33_bfp.out15_33" ], "const_args": [ "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_0_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_1" ], "out_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37" ], "const_args": [ "onnx::MatMul_3983_onnx::MatMul_3980" ], "out_args": [ "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37" ], "const_args": [ "onnx::MatMul_3984_onnx::MatMul_3981" ], "out_args": [ "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37" ], "const_args": [ "onnx::MatMul_3985_onnx::MatMul_3982" ], "out_args": [ "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_8", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24", "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25", "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26" ], "const_args": [], "out_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_4000" ], "out_args": [ "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.7/Add_7_output_0.out10_35" ], "const_args": [ "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_4_output_0.out10_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8" ], "const_args": [ "onnx::MatMul_3999" ], "out_args": [ "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/transformer_blocks.7/Add_3_output_0.out10_33" ], "const_args": [ "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_2_gma" ], "out_args": [ "/transformer_blocks.8/Add_output_0.out10_36" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_output_0.out10_36" ], "const_args": [ "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34" ], "const_args": [ "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_3_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_4" ], "out_args": [ "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_38" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_38" ], "const_args": [ "onnx::MatMul_4001" ], "out_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_45" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_45" ], "const_args": [ "onnx::MatMul_4002" ], "out_args": [ "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_83_bfp.out25_47" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_83_bfp.out25_47", "/transformer_blocks.8/Add_output_0.out10_36" ], "const_args": [ "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_3_output_0.out10_37" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.8/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.8/Add_3_output_0.out10_37" ], "const_args": [ "onnx::MatMul_4082" ], "out_args": [ "/controlnet_blocks.8/Add_output_0.out17_3_85" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_4_output_0.out10_38" ], "const_args": [ "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35" ], "const_args": [ "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_3_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_4" ], "out_args": [ "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_40" ], "const_args": [ "onnx::MatMul_4003" ], "out_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_46" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_46" ], "const_args": [ "onnx::MatMul_4004" ], "out_args": [ "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_48" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.8/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_48", "/transformer_blocks.8/Add_4_output_0.out10_38" ], "const_args": [ "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_5_gma" ], "out_args": [ "/transformer_blocks.8/Add_7_output_0.out10_39" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_3_output_0.out10_37" ], "const_args": [ "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.8/Add_7_output_0.out10_39" ], "const_args": [ "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_36_bfp.out15_36" ], "const_args": [ "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_0_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_1" ], "out_args": [ "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_37_bfp.out15_37" ], "const_args": [ "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_0_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_1" ], "out_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41" ], "const_args": [ "onnx::MatMul_4008_onnx::MatMul_4005" ], "out_args": [ "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41" ], "const_args": [ "onnx::MatMul_4009_onnx::MatMul_4006" ], "out_args": [ "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41" ], "const_args": [ "onnx::MatMul_4010_onnx::MatMul_4007" ], "out_args": [ "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_9", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27", "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28", "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29" ], "const_args": [], "out_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "const_args": [ "onnx::MatMul_4025" ], "out_args": [ "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.8/Add_7_output_0.out10_39" ], "const_args": [ "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_4_output_0.out10_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9" ], "const_args": [ "onnx::MatMul_4024" ], "out_args": [ "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/transformer_blocks.8/Add_3_output_0.out10_37" ], "const_args": [ "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_2_gma" ], "out_args": [ "/transformer_blocks.9/Add_output_0.out10_40" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_output_0.out10_40" ], "const_args": [ "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38" ], "const_args": [ "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_3_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_4" ], "out_args": [ "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_42" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_42" ], "const_args": [ "onnx::MatMul_4026" ], "out_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_50" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_50" ], "const_args": [ "onnx::MatMul_4027" ], "out_args": [ "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_92_bfp.out25_52" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_92_bfp.out25_52", "/transformer_blocks.9/Add_output_0.out10_40" ], "const_args": [ "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_3_output_0.out10_41" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_3_output_0.out10_41" ], "const_args": [ "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40" ], "const_args": [ "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_0_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_1" ], "out_args": [ "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.9/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.9/Add_3_output_0.out10_41" ], "const_args": [ "onnx::MatMul_4083" ], "out_args": [ "/controlnet_blocks.9/Add_output_0.out17_3_94" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_4_output_0.out10_42" ], "const_args": [ "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39" ], "const_args": [ "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_3_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_4" ], "out_args": [ "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_44" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_44" ], "const_args": [ "onnx::MatMul_4028" ], "out_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_51" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_51" ], "const_args": [ "onnx::MatMul_4029" ], "out_args": [ "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_93_bfp.out25_53" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.9/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_93_bfp.out25_53", "/transformer_blocks.9/Add_4_output_0.out10_42" ], "const_args": [ "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_5_gma" ], "out_args": [ "/transformer_blocks.9/Add_7_output_0.out10_43" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.9/Add_7_output_0.out10_43" ], "const_args": [ "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41" ], "const_args": [ "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_0_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_1" ], "out_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_4033_onnx::MatMul_4030" ], "out_args": [ "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_4034_onnx::MatMul_4031" ], "out_args": [ "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6" ], "const_args": [ "onnx::MatMul_4035_onnx::MatMul_4032" ], "out_args": [ "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_10", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30", "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31", "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32" ], "const_args": [], "out_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_4049" ], "out_args": [ "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/transformer_blocks.9/Add_3_output_0.out10_41" ], "const_args": [ "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_output_0.out10_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_output_0.out10_6" ], "const_args": [ "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42" ], "const_args": [ "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_3_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_4" ], "out_args": [ "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7" ], "const_args": [ "onnx::MatMul_4051" ], "out_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_55" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_55" ], "const_args": [ "onnx::MatMul_4052" ], "out_args": [ "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_101_bfp.out25_57" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_101_bfp.out25_57", "/transformer_blocks.10/Add_output_0.out10_6" ], "const_args": [ "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_3_output_0.out10_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.10/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.10/Add_3_output_0.out10_7" ], "const_args": [ "onnx::MatMul_4084" ], "out_args": [ "/controlnet_blocks.10/Add_output_0.out17_3_103" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10" ], "const_args": [ "onnx::MatMul_4050" ], "out_args": [ "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.9/Add_7_output_0.out10_43" ], "const_args": [ "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_2_gma" ], "out_args": [ "/transformer_blocks.10/Add_4_output_0.out10_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_4_output_0.out10_8" ], "const_args": [ "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43" ], "const_args": [ "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_3_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_4" ], "out_args": [ "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9" ], "const_args": [ "onnx::MatMul_4053" ], "out_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_56" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_56" ], "const_args": [ "onnx::MatMul_4054" ], "out_args": [ "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_58" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.10/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_58", "/transformer_blocks.10/Add_4_output_0.out10_8" ], "const_args": [ "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_5_gma" ], "out_args": [ "/transformer_blocks.10/Add_7_output_0.out10_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_3_output_0.out10_7" ], "const_args": [ "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_44_bfp.out15_44" ], "const_args": [ "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_0_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_1" ], "out_args": [ "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.10/Add_7_output_0.out10_9" ], "const_args": [ "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_45_bfp.out15_45" ], "const_args": [ "transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_0_transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_1" ], "out_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10" ], "const_args": [ "onnx::MatMul_4058_onnx::MatMul_4055" ], "out_args": [ "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10" ], "const_args": [ "onnx::MatMul_4059_onnx::MatMul_4056" ], "out_args": [ "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10" ], "const_args": [ "onnx::MatMul_4060_onnx::MatMul_4057" ], "out_args": [ "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "MultiHeadAttention_11", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33", "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34", "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35" ], "const_args": [], "out_args": [ "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11" ], "const_args": [ "onnx::MatMul_4071" ], "out_args": [ "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/transformer_blocks.10/Add_3_output_0.out10_7" ], "const_args": [ "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_2_gma" ], "out_args": [ "/transformer_blocks.11/Add_output_0.out10_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.11/Add_output_0.out10_10" ], "const_args": [ "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46" ], "const_args": [ "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_3_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_4" ], "out_args": [ "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11" ], "const_args": [ "onnx::MatMul_4072" ], "out_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_60" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_60" ], "const_args": [ "onnx::MatMul_4073" ], "out_args": [ "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_110_bfp.out25_61" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.11/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_110_bfp.out25_61", "/transformer_blocks.11/Add_output_0.out10_10" ], "const_args": [ "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_5_gma" ], "out_args": [ "/transformer_blocks.11/Add_3_output_0.out10_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.11/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.11/Add_3_output_0.out10_11" ], "const_args": [ "onnx::MatMul_4085" ], "out_args": [ "/controlnet_blocks.11/Add_output_0.out17_3_111" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } } ], "fused_tensors": { "in": { "buffer_size": 15936, "xrt_arg_id": 0, "packed_tensors": [ "hidden_states_nhwc.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2", "controlnet_cond_nhwc.out5_0_1", "/time_text_embed/Cast_output_0.out17_3_3", "pooled_projections.out17_3_1", "encoder_hidden_states.out17_3_0" ] }, "out": { "buffer_size": 36864, "xrt_arg_id": 1, "packed_tensors": [ "/controlnet_blocks.0/Add_output_0.out17_3_13", "/controlnet_blocks.1/Add_output_0.out17_3_22", "/controlnet_blocks.2/Add_output_0.out17_3_31", "/controlnet_blocks.3/Add_output_0.out17_3_40", "/controlnet_blocks.4/Add_output_0.out17_3_49", "/controlnet_blocks.5/Add_output_0.out17_3_58", "/controlnet_blocks.6/Add_output_0.out17_3_67", "/controlnet_blocks.7/Add_output_0.out17_3_76", "/controlnet_blocks.8/Add_output_0.out17_3_85", "/controlnet_blocks.9/Add_output_0.out17_3_94", "/controlnet_blocks.10/Add_output_0.out17_3_103", "/controlnet_blocks.11/Add_output_0.out17_3_111" ] }, "scratch": { "buffer_size": 616960, "xrt_arg_id": 2, "packed_tensors": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Add_2_output_0.out_35_1_2", "/pos_embed_input/Transpose_output_0.out5_0_1", "/Add_output_0.out_35_1_3", "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3", "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1", "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1", "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2", "/time_text_embed/Add_output_0.out_35_1_4", "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "encoder_hidden_states.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0", "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45", "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2", "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/transformer_blocks.0/Add_4_output_0.out10_0", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3", "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1", "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_7_output_0.out10_1", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/transformer_blocks.0/Add_output_0.out10_44", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2", "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_46", "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_3_output_0.out10_45", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5", "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5", "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.1/Add_output_0.out10_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6", "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3", "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_3_output_0.out10_3", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.1/Add_4_output_0.out10_4", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7", "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5", "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", "/transformer_blocks.1/Add_7_output_0.out10_5", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13", "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15", "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8", "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.2/Add_4_output_0.out10_14", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.2/Add_output_0.out10_12", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10", "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_14", "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", "/transformer_blocks.2/Add_3_output_0.out10_13", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11", "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_16", "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", "/transformer_blocks.2/Add_7_output_0.out10_15", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17", "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19", "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11", "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.3/Add_4_output_0.out10_18", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.3/Add_output_0.out10_16", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14", "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_18", "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", "/transformer_blocks.3/Add_3_output_0.out10_17", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15", "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_20", "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", "/transformer_blocks.3/Add_7_output_0.out10_19", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21", "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23", "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14", "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.4/Add_4_output_0.out10_22", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.4/Add_output_0.out10_20", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18", "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_22", "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", "/transformer_blocks.4/Add_3_output_0.out10_21", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19", "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_24", "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", "/transformer_blocks.4/Add_7_output_0.out10_23", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25", "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27", "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17", "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5", "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11", "/transformer_blocks.5/Add_4_output_0.out10_26", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.5/Add_output_0.out10_24", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22", "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_26", "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_32", "/transformer_blocks.5/Add_3_output_0.out10_25", "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23", "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_28", "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_31", "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_57_bfp.out25_33", "/transformer_blocks.5/Add_7_output_0.out10_27", "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_24_bfp.out15_24", "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_25_bfp.out15_25", "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29", "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31", "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18", "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19", "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20", "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6", "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13", "/transformer_blocks.6/Add_4_output_0.out10_30", "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12", "/transformer_blocks.6/Add_output_0.out10_28", "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26", "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_30", "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_35", "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_65_bfp.out25_37", "/transformer_blocks.6/Add_3_output_0.out10_29", "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27", "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_32", "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_36", "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_66_bfp.out25_38", "/transformer_blocks.6/Add_7_output_0.out10_31", "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_28_bfp.out15_28", "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_29_bfp.out15_29", "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33", "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35", "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21", "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22", "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23", "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7", "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15", "/transformer_blocks.7/Add_4_output_0.out10_34", "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14", "/transformer_blocks.7/Add_output_0.out10_32", "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30", "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_34", "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_40", "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_74_bfp.out25_42", "/transformer_blocks.7/Add_3_output_0.out10_33", "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31", "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_36", "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_41", "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_75_bfp.out25_43", "/transformer_blocks.7/Add_7_output_0.out10_35", "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_32_bfp.out15_32", "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_33_bfp.out15_33", "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37", "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39", "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24", "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25", "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26", "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8", "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17", "/transformer_blocks.8/Add_4_output_0.out10_38", "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16", "/transformer_blocks.8/Add_output_0.out10_36", "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34", "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_38", "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_45", "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_83_bfp.out25_47", "/transformer_blocks.8/Add_3_output_0.out10_37", "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35", "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_40", "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_46", "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_48", "/transformer_blocks.8/Add_7_output_0.out10_39", "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_36_bfp.out15_36", "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_37_bfp.out15_37", "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41", "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43", "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27", "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28", "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29", "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9", "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19", "/transformer_blocks.9/Add_4_output_0.out10_42", "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18", "/transformer_blocks.9/Add_output_0.out10_40", "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38", "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_42", "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_50", "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_92_bfp.out25_52", "/transformer_blocks.9/Add_3_output_0.out10_41", "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40", "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6", "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39", "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_44", "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_51", "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_93_bfp.out25_53", "/transformer_blocks.9/Add_7_output_0.out10_43", "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41", "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8", "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30", "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31", "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32", "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10", "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20", "/transformer_blocks.10/Add_output_0.out10_6", "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42", "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7", "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_55", "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_101_bfp.out25_57", "/transformer_blocks.10/Add_3_output_0.out10_7", "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21", "/transformer_blocks.10/Add_4_output_0.out10_8", "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43", "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9", "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_56", "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_58", "/transformer_blocks.10/Add_7_output_0.out10_9", "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_44_bfp.out15_44", "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10", "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_45_bfp.out15_45", "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12", "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33", "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34", "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35", "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11", "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22", "/transformer_blocks.11/Add_output_0.out10_10", "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46", "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11", "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_60", "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_110_bfp.out25_61", "/transformer_blocks.11/Add_3_output_0.out10_11" ] }, "const": { "buffer_size": 1177105408, "xrt_arg_id": 3, "packed_tensors": [ "pos_embed.proj.weight", "pos_embed_input.proj.weight", "time_text_embed.timestep_embedder.linear_1.weight_5_1_2", "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1", "time_text_embed.timestep_embedder.linear_2.weight_5_1_3", "time_text_embed.text_embedder.linear_1.weight_5_1_0", "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0", "time_text_embed.text_embedder.linear_2.weight_5_1_1", "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2", "encoder_hidden_states.out17_3_0_bfp.wts", "onnx::MatMul_3779", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts", "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0", "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_0", "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0", "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_0", "onnx::MatMul_3783_onnx::MatMul_3780", "onnx::MatMul_3784_onnx::MatMul_3781", "onnx::MatMul_3785_onnx::MatMul_3782", "onnx::MatMul_3800", "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_2_gma", "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0", "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_3_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_4", "onnx::MatMul_3803", "onnx::MatMul_3804", "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_5_gma", "onnx::MatMul_3799", "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_2_gma", "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0", "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_4", "onnx::MatMul_3801", "onnx::MatMul_3802", "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_5_gma", "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0", "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_1", "onnx::MatMul_4074", "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0", "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_1", "onnx::MatMul_3808_onnx::MatMul_3805", "onnx::MatMul_3809_onnx::MatMul_3806", "onnx::MatMul_3810_onnx::MatMul_3807", "onnx::MatMul_3824", "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_2_gma", "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0", "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_4", "onnx::MatMul_3826", "onnx::MatMul_3827", "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_5_gma", "onnx::MatMul_4075", "onnx::MatMul_3825", "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_2_gma", "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0", "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_4", "onnx::MatMul_3828", "onnx::MatMul_3829", "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_5_gma", "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0", "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_1", "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_1", "onnx::MatMul_3833_onnx::MatMul_3830", "onnx::MatMul_3834_onnx::MatMul_3831", "onnx::MatMul_3835_onnx::MatMul_3832", "onnx::MatMul_3850", "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_2_gma", "onnx::MatMul_3849", "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_2_gma", "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0", "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_4", "onnx::MatMul_3851", "onnx::MatMul_3852", "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_5_gma", "onnx::MatMul_4076", "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0", "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_4", "onnx::MatMul_3853", "onnx::MatMul_3854", "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_5_gma", "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0", "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_1", "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_1", "onnx::MatMul_3858_onnx::MatMul_3855", "onnx::MatMul_3859_onnx::MatMul_3856", "onnx::MatMul_3860_onnx::MatMul_3857", "onnx::MatMul_3875", "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_2_gma", "onnx::MatMul_3874", "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_2_gma", "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0", "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_4", "onnx::MatMul_3876", "onnx::MatMul_3877", "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_5_gma", "onnx::MatMul_4077", "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0", "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_4", "onnx::MatMul_3878", "onnx::MatMul_3879", "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_5_gma", "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0", "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_1", "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_1", "onnx::MatMul_3883_onnx::MatMul_3880", "onnx::MatMul_3884_onnx::MatMul_3881", "onnx::MatMul_3885_onnx::MatMul_3882", "onnx::MatMul_3900", "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_2_gma", "onnx::MatMul_3899", "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_2_gma", "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0", "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_4", "onnx::MatMul_3901", "onnx::MatMul_3902", "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_5_gma", "onnx::MatMul_4078", "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0", "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_4", "onnx::MatMul_3903", "onnx::MatMul_3904", "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_5_gma", "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0", "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_1", "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_1", "onnx::MatMul_3908_onnx::MatMul_3905", "onnx::MatMul_3909_onnx::MatMul_3906", "onnx::MatMul_3910_onnx::MatMul_3907", "onnx::MatMul_3925", "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_2_gma", "onnx::MatMul_3924", "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_2_gma", "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0", "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_3_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_4", "onnx::MatMul_3926", "onnx::MatMul_3927", "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_5_gma", "onnx::MatMul_4079", "/transformer_blocks.5/norm2_context/Constant_output_0", "/transformer_blocks.5/norm2_context/Constant_1_output_0", "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_4", "onnx::MatMul_3928", "onnx::MatMul_3929", "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_5_gma", "/transformer_blocks.6/norm1/norm/Constant_output_0", "/transformer_blocks.6/norm1/norm/Constant_1_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_output_0", "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0", "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_0_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_1", "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_0_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_1", "onnx::MatMul_3933_onnx::MatMul_3930", "onnx::MatMul_3934_onnx::MatMul_3931", "onnx::MatMul_3935_onnx::MatMul_3932", "onnx::MatMul_3950", "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_2_gma", "onnx::MatMul_3949", "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_2_gma", "/transformer_blocks.6/norm2/Constant_output_0", "/transformer_blocks.6/norm2/Constant_1_output_0", "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_3_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_4", "onnx::MatMul_3951", "onnx::MatMul_3952", "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_5_gma", "onnx::MatMul_4080", "/transformer_blocks.6/norm2_context/Constant_output_0", "/transformer_blocks.6/norm2_context/Constant_1_output_0", "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_3_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_4", "onnx::MatMul_3953", "onnx::MatMul_3954", "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_5_gma", "/transformer_blocks.7/norm1/norm/Constant_output_0", "/transformer_blocks.7/norm1/norm/Constant_1_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_output_0", "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0", "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_0_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_1", "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_0_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_1", "onnx::MatMul_3958_onnx::MatMul_3955", "onnx::MatMul_3959_onnx::MatMul_3956", "onnx::MatMul_3960_onnx::MatMul_3957", "onnx::MatMul_3975", "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_2_gma", "onnx::MatMul_3974", "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_2_gma", "/transformer_blocks.7/norm2/Constant_output_0", "/transformer_blocks.7/norm2/Constant_1_output_0", "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_3_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_4", "onnx::MatMul_3976", "onnx::MatMul_3977", "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_5_gma", "onnx::MatMul_4081", "/transformer_blocks.7/norm2_context/Constant_output_0", "/transformer_blocks.7/norm2_context/Constant_1_output_0", "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_3_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_4", "onnx::MatMul_3978", "onnx::MatMul_3979", "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_5_gma", "/transformer_blocks.8/norm1/norm/Constant_output_0", "/transformer_blocks.8/norm1/norm/Constant_1_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_output_0", "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0", "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_0_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_1", "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_0_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_1", "onnx::MatMul_3983_onnx::MatMul_3980", "onnx::MatMul_3984_onnx::MatMul_3981", "onnx::MatMul_3985_onnx::MatMul_3982", "onnx::MatMul_4000", "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_2_gma", "onnx::MatMul_3999", "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_2_gma", "/transformer_blocks.8/norm2/Constant_output_0", "/transformer_blocks.8/norm2/Constant_1_output_0", "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_3_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_4", "onnx::MatMul_4001", "onnx::MatMul_4002", "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_5_gma", "onnx::MatMul_4082", "/transformer_blocks.8/norm2_context/Constant_output_0", "/transformer_blocks.8/norm2_context/Constant_1_output_0", "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_3_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_4", "onnx::MatMul_4003", "onnx::MatMul_4004", "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_5_gma", "/transformer_blocks.9/norm1/norm/Constant_output_0", "/transformer_blocks.9/norm1/norm/Constant_1_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_output_0", "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0", "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_0_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_1", "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_0_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_1", "onnx::MatMul_4008_onnx::MatMul_4005", "onnx::MatMul_4009_onnx::MatMul_4006", "onnx::MatMul_4010_onnx::MatMul_4007", "onnx::MatMul_4025", "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_2_gma", "onnx::MatMul_4024", "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_2_gma", "/transformer_blocks.9/norm2/Constant_output_0", "/transformer_blocks.9/norm2/Constant_1_output_0", "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_3_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_4", "onnx::MatMul_4026", "onnx::MatMul_4027", "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_5_gma", "/transformer_blocks.10/norm1/norm/Constant_output_0", "/transformer_blocks.10/norm1/norm/Constant_1_output_0", "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_0_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_1", "onnx::MatMul_4083", "/transformer_blocks.9/norm2_context/Constant_output_0", "/transformer_blocks.9/norm2_context/Constant_1_output_0", "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_3_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_4", "onnx::MatMul_4028", "onnx::MatMul_4029", "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_5_gma", "/transformer_blocks.10/norm1_context/norm/Constant_output_0", "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0", "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_0_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_1", "onnx::MatMul_4033_onnx::MatMul_4030", "onnx::MatMul_4034_onnx::MatMul_4031", "onnx::MatMul_4035_onnx::MatMul_4032", "onnx::MatMul_4049", "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_2_gma", "/transformer_blocks.10/norm2/Constant_output_0", "/transformer_blocks.10/norm2/Constant_1_output_0", "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_3_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_4", "onnx::MatMul_4051", "onnx::MatMul_4052", "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_5_gma", "onnx::MatMul_4084", "onnx::MatMul_4050", "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_2_gma", "/transformer_blocks.10/norm2_context/Constant_output_0", "/transformer_blocks.10/norm2_context/Constant_1_output_0", "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_3_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_4", "onnx::MatMul_4053", "onnx::MatMul_4054", "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_5_gma", "/transformer_blocks.11/norm1/norm/Constant_output_0", "/transformer_blocks.11/norm1/norm/Constant_1_output_0", "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_0_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_1", "/transformer_blocks.11/norm1_context/norm/Constant_output_0", "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0", "transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_0_transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_1", "onnx::MatMul_4058_onnx::MatMul_4055", "onnx::MatMul_4059_onnx::MatMul_4056", "onnx::MatMul_4060_onnx::MatMul_4057", "onnx::MatMul_4071", "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_2_gma", "/transformer_blocks.11/norm2/Constant_output_0", "/transformer_blocks.11/norm2/Constant_1_output_0", "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_3_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_4", "onnx::MatMul_4072", "onnx::MatMul_4073", "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_5_gma", "onnx::MatMul_4085" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] } }, "tensor_map": { "hidden_states_nhwc.out5_0_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_size", "w", "h", "False" ], "offset": 0 }, "/pos_embed/Reshape_1_output_0.out_35_1_2": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "False", "floor(h/2)*floor(w/2)", "False" ], "offset": 32 }, "controlnet_cond_nhwc.out5_0_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_size", "w", "h", "False" ], "offset": 3104 }, "/time_text_embed/Cast_output_0.out17_3_3": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 256 ], "size_in_bytes": 512, "op_tensor_size": 512, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3136 }, "pooled_projections.out17_3_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 2048 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3648 }, "encoder_hidden_states.out17_3_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 4096 ], "size_in_bytes": 8192, "op_tensor_size": 8192, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 7744 }, "/controlnet_blocks.0/Add_output_0.out17_3_13": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 0 }, "/controlnet_blocks.1/Add_output_0.out17_3_22": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 3072 }, "/controlnet_blocks.2/Add_output_0.out17_3_31": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 6144 }, "/controlnet_blocks.3/Add_output_0.out17_3_40": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 9216 }, "/controlnet_blocks.4/Add_output_0.out17_3_49": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 12288 }, "/controlnet_blocks.5/Add_output_0.out17_3_58": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 15360 }, "/controlnet_blocks.6/Add_output_0.out17_3_67": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 18432 }, "/controlnet_blocks.7/Add_output_0.out17_3_76": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 21504 }, "/controlnet_blocks.8/Add_output_0.out17_3_85": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 24576 }, "/controlnet_blocks.9/Add_output_0.out17_3_94": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 27648 }, "/controlnet_blocks.10/Add_output_0.out17_3_103": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 30720 }, "/controlnet_blocks.11/Add_output_0.out17_3_111": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 33792 }, "/pos_embed/Transpose_output_0.out5_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 0 }, "/pos_embed/Add_2_output_0.out_35_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 3072 }, "/pos_embed_input/Transpose_output_0.out5_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 6144 }, "/Add_output_0.out_35_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 9216 }, "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 12288 }, "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 15360 }, "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 18432 }, "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 21504 }, "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 24576 }, "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 27648 }, "/time_text_embed/Add_output_0.out_35_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 30720 }, "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 33792 }, "encoder_hidden_states.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 4096 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 36864 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 40960 }, "/context_embedder/Add_output_0.out17_3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 42496 }, "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 45568 }, "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 47104 }, "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 48640 }, "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 50176 }, "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 51712 }, "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 53248 }, "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 54784 }, "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 56320 }, "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 57856 }, "/transformer_blocks.0/Add_4_output_0.out10_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 59392 }, "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 62464 }, "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 64000 }, "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 65536 }, "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 71680 }, "/transformer_blocks.0/Add_7_output_0.out10_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 73216 }, "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 76288 }, "/transformer_blocks.0/Add_output_0.out10_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 77824 }, "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 80896 }, "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 82432 }, "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 83968 }, "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 90112 }, "/transformer_blocks.0/Add_3_output_0.out10_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 91648 }, "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 94720 }, "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 96256 }, "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 97792 }, "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 99328 }, "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 100864 }, "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 102400 }, "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 103936 }, "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 105472 }, "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 107008 }, "/transformer_blocks.1/Add_output_0.out10_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 108544 }, "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 111616 }, "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 113152 }, "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 114688 }, "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 120832 }, "/transformer_blocks.1/Add_3_output_0.out10_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 122368 }, "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 125440 }, "/transformer_blocks.1/Add_4_output_0.out10_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 126976 }, "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 130048 }, "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 131584 }, "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 133120 }, "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 139264 }, "/transformer_blocks.1/Add_7_output_0.out10_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 140800 }, "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 143872 }, "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 145408 }, "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 146944 }, "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 148480 }, "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 150016 }, "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 151552 }, "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 153088 }, "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 154624 }, "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 156160 }, "/transformer_blocks.2/Add_4_output_0.out10_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 157696 }, "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 160768 }, "/transformer_blocks.2/Add_output_0.out10_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 162304 }, "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 165376 }, "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 166912 }, "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 168448 }, "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 174592 }, "/transformer_blocks.2/Add_3_output_0.out10_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 176128 }, "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 179200 }, "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 180736 }, "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 182272 }, "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 188416 }, "/transformer_blocks.2/Add_7_output_0.out10_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 189952 }, "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 193024 }, "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 194560 }, "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 196096 }, "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 197632 }, "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 199168 }, "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 200704 }, "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 202240 }, "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 203776 }, "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 205312 }, "/transformer_blocks.3/Add_4_output_0.out10_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 206848 }, "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 209920 }, "/transformer_blocks.3/Add_output_0.out10_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 211456 }, "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 214528 }, "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 216064 }, "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 217600 }, "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 223744 }, "/transformer_blocks.3/Add_3_output_0.out10_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 225280 }, "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 228352 }, "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 229888 }, "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 231424 }, "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 237568 }, "/transformer_blocks.3/Add_7_output_0.out10_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 239104 }, "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 242176 }, "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 243712 }, "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 245248 }, "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 246784 }, "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 248320 }, "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 249856 }, "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 251392 }, "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 252928 }, "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 254464 }, "/transformer_blocks.4/Add_4_output_0.out10_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 256000 }, "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 259072 }, "/transformer_blocks.4/Add_output_0.out10_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 260608 }, "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 263680 }, "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 265216 }, "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 266752 }, "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 272896 }, "/transformer_blocks.4/Add_3_output_0.out10_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 274432 }, "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 277504 }, "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 279040 }, "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 280576 }, "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 286720 }, "/transformer_blocks.4/Add_7_output_0.out10_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 288256 }, "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 291328 }, "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 292864 }, "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 294400 }, "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 295936 }, "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 297472 }, "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 299008 }, "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 300544 }, "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 302080 }, "/transformer_blocks.5/attn/to_add_out/Add_output_0.out6_1_11_bfp.out7_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 303616 }, "/transformer_blocks.5/Add_4_output_0.out10_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 305152 }, "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 308224 }, "/transformer_blocks.5/Add_output_0.out10_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 309760 }, "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 312832 }, "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 314368 }, "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 315904 }, "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 322048 }, "/transformer_blocks.5/Add_3_output_0.out10_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 323584 }, "/transformer_blocks.5/norm2_context/LayerNormalization_output_0.out14_23_bfp.out15_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 326656 }, "/transformer_blocks.5/Add_6_output_0.out0_0_23_bfp.out1_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 328192 }, "/transformer_blocks.5/ff_context/net.0/Mul_5_output_0.out17_2_11_bfp.out25_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 329728 }, "/transformer_blocks.5/ff_context/net.2/Add_output_0.out17_3_57_bfp.out25_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 335872 }, "/transformer_blocks.5/Add_7_output_0.out10_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 337408 }, "/transformer_blocks.6/norm1/norm/LayerNormalization_output_0.out14_24_bfp.out15_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 340480 }, "/transformer_blocks.6/norm1_context/norm/LayerNormalization_output_0.out14_25_bfp.out15_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 342016 }, "/transformer_blocks.6/norm1/Add_2_output_0.out0_0_24_bfp.out1_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 343552 }, "/transformer_blocks.6/norm1_context/Add_2_output_0.out0_0_25_bfp.out1_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 345088 }, "/transformer_blocks.6/attn/Concat_output_0.out22_6_bfp.out23_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 346624 }, "/transformer_blocks.6/attn/Concat_1_output_0.out22_6_bfp.out23_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 348160 }, "/transformer_blocks.6/attn/Concat_2_output_0.out22_6_bfp.out23_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 349696 }, "/transformer_blocks.6/attn/Reshape_3_output_0.out22_6_bfp.out27_0_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 351232 }, "/transformer_blocks.6/attn/to_add_out/Add_output_0.out6_1_13_bfp.out7_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 352768 }, "/transformer_blocks.6/Add_4_output_0.out10_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 354304 }, "/transformer_blocks.6/attn/to_out.0/Add_output_0.out6_1_12_bfp.out7_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 357376 }, "/transformer_blocks.6/Add_output_0.out10_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 358912 }, "/transformer_blocks.6/norm2/LayerNormalization_output_0.out14_26_bfp.out15_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 361984 }, "/transformer_blocks.6/Add_2_output_0.out0_0_26_bfp.out1_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 363520 }, "/transformer_blocks.6/ff/net.0/Mul_5_output_0.out17_2_12_bfp.out25_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 365056 }, "/transformer_blocks.6/ff/net.2/Add_output_0.out17_3_65_bfp.out25_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 371200 }, "/transformer_blocks.6/Add_3_output_0.out10_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 372736 }, "/transformer_blocks.6/norm2_context/LayerNormalization_output_0.out14_27_bfp.out15_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 375808 }, "/transformer_blocks.6/Add_6_output_0.out0_0_27_bfp.out1_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 377344 }, "/transformer_blocks.6/ff_context/net.0/Mul_5_output_0.out17_2_13_bfp.out25_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 378880 }, "/transformer_blocks.6/ff_context/net.2/Add_output_0.out17_3_66_bfp.out25_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 385024 }, "/transformer_blocks.6/Add_7_output_0.out10_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 386560 }, "/transformer_blocks.7/norm1/norm/LayerNormalization_output_0.out14_28_bfp.out15_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 389632 }, "/transformer_blocks.7/norm1_context/norm/LayerNormalization_output_0.out14_29_bfp.out15_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 391168 }, "/transformer_blocks.7/norm1/Add_2_output_0.out0_0_28_bfp.out1_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 392704 }, "/transformer_blocks.7/norm1_context/Add_2_output_0.out0_0_29_bfp.out1_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 394240 }, "/transformer_blocks.7/attn/Concat_output_0.out22_7_bfp.out23_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 395776 }, "/transformer_blocks.7/attn/Concat_1_output_0.out22_7_bfp.out23_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 397312 }, "/transformer_blocks.7/attn/Concat_2_output_0.out22_7_bfp.out23_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 398848 }, "/transformer_blocks.7/attn/Reshape_3_output_0.out22_7_bfp.out27_0_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 400384 }, "/transformer_blocks.7/attn/to_add_out/Add_output_0.out6_1_15_bfp.out7_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 401920 }, "/transformer_blocks.7/Add_4_output_0.out10_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 403456 }, "/transformer_blocks.7/attn/to_out.0/Add_output_0.out6_1_14_bfp.out7_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 406528 }, "/transformer_blocks.7/Add_output_0.out10_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 408064 }, "/transformer_blocks.7/norm2/LayerNormalization_output_0.out14_30_bfp.out15_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 411136 }, "/transformer_blocks.7/Add_2_output_0.out0_0_30_bfp.out1_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 412672 }, "/transformer_blocks.7/ff/net.0/Mul_5_output_0.out17_2_14_bfp.out25_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 414208 }, "/transformer_blocks.7/ff/net.2/Add_output_0.out17_3_74_bfp.out25_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 420352 }, "/transformer_blocks.7/Add_3_output_0.out10_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 421888 }, "/transformer_blocks.7/norm2_context/LayerNormalization_output_0.out14_31_bfp.out15_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 424960 }, "/transformer_blocks.7/Add_6_output_0.out0_0_31_bfp.out1_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 426496 }, "/transformer_blocks.7/ff_context/net.0/Mul_5_output_0.out17_2_15_bfp.out25_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 428032 }, "/transformer_blocks.7/ff_context/net.2/Add_output_0.out17_3_75_bfp.out25_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 434176 }, "/transformer_blocks.7/Add_7_output_0.out10_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 435712 }, "/transformer_blocks.8/norm1/norm/LayerNormalization_output_0.out14_32_bfp.out15_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 438784 }, "/transformer_blocks.8/norm1_context/norm/LayerNormalization_output_0.out14_33_bfp.out15_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 440320 }, "/transformer_blocks.8/norm1/Add_2_output_0.out0_0_32_bfp.out1_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 441856 }, "/transformer_blocks.8/norm1_context/Add_2_output_0.out0_0_33_bfp.out1_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 443392 }, "/transformer_blocks.8/attn/Concat_output_0.out22_8_bfp.out23_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 444928 }, "/transformer_blocks.8/attn/Concat_1_output_0.out22_8_bfp.out23_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 446464 }, "/transformer_blocks.8/attn/Concat_2_output_0.out22_8_bfp.out23_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 448000 }, "/transformer_blocks.8/attn/Reshape_3_output_0.out22_8_bfp.out27_0_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 449536 }, "/transformer_blocks.8/attn/to_add_out/Add_output_0.out6_1_17_bfp.out7_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 451072 }, "/transformer_blocks.8/Add_4_output_0.out10_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 452608 }, "/transformer_blocks.8/attn/to_out.0/Add_output_0.out6_1_16_bfp.out7_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 455680 }, "/transformer_blocks.8/Add_output_0.out10_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 457216 }, "/transformer_blocks.8/norm2/LayerNormalization_output_0.out14_34_bfp.out15_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 460288 }, "/transformer_blocks.8/Add_2_output_0.out0_0_34_bfp.out1_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 461824 }, "/transformer_blocks.8/ff/net.0/Mul_5_output_0.out17_2_16_bfp.out25_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 463360 }, "/transformer_blocks.8/ff/net.2/Add_output_0.out17_3_83_bfp.out25_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 469504 }, "/transformer_blocks.8/Add_3_output_0.out10_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 471040 }, "/transformer_blocks.8/norm2_context/LayerNormalization_output_0.out14_35_bfp.out15_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 474112 }, "/transformer_blocks.8/Add_6_output_0.out0_0_35_bfp.out1_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 475648 }, "/transformer_blocks.8/ff_context/net.0/Mul_5_output_0.out17_2_17_bfp.out25_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 477184 }, "/transformer_blocks.8/ff_context/net.2/Add_output_0.out17_3_84_bfp.out25_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 483328 }, "/transformer_blocks.8/Add_7_output_0.out10_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 484864 }, "/transformer_blocks.9/norm1/norm/LayerNormalization_output_0.out14_36_bfp.out15_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 487936 }, "/transformer_blocks.9/norm1_context/norm/LayerNormalization_output_0.out14_37_bfp.out15_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 489472 }, "/transformer_blocks.9/norm1/Add_2_output_0.out0_0_36_bfp.out1_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 491008 }, "/transformer_blocks.9/norm1_context/Add_2_output_0.out0_0_37_bfp.out1_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 492544 }, "/transformer_blocks.9/attn/Concat_output_0.out22_9_bfp.out23_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 494080 }, "/transformer_blocks.9/attn/Concat_1_output_0.out22_9_bfp.out23_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 495616 }, "/transformer_blocks.9/attn/Concat_2_output_0.out22_9_bfp.out23_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 497152 }, "/transformer_blocks.9/attn/Reshape_3_output_0.out22_9_bfp.out27_0_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 498688 }, "/transformer_blocks.9/attn/to_add_out/Add_output_0.out6_1_19_bfp.out7_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 500224 }, "/transformer_blocks.9/Add_4_output_0.out10_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 501760 }, "/transformer_blocks.9/attn/to_out.0/Add_output_0.out6_1_18_bfp.out7_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 504832 }, "/transformer_blocks.9/Add_output_0.out10_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 506368 }, "/transformer_blocks.9/norm2/LayerNormalization_output_0.out14_38_bfp.out15_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 509440 }, "/transformer_blocks.9/Add_2_output_0.out0_0_38_bfp.out1_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 510976 }, "/transformer_blocks.9/ff/net.0/Mul_5_output_0.out17_2_18_bfp.out25_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 512512 }, "/transformer_blocks.9/ff/net.2/Add_output_0.out17_3_92_bfp.out25_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 518656 }, "/transformer_blocks.9/Add_3_output_0.out10_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 520192 }, "/transformer_blocks.10/norm1/norm/LayerNormalization_output_0.out14_40_bfp.out15_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 523264 }, "/transformer_blocks.10/norm1/Add_2_output_0.out0_0_40_bfp.out1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 524800 }, "/transformer_blocks.9/norm2_context/LayerNormalization_output_0.out14_39_bfp.out15_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 526336 }, "/transformer_blocks.9/Add_6_output_0.out0_0_39_bfp.out1_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 527872 }, "/transformer_blocks.9/ff_context/net.0/Mul_5_output_0.out17_2_19_bfp.out25_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 529408 }, "/transformer_blocks.9/ff_context/net.2/Add_output_0.out17_3_93_bfp.out25_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 535552 }, "/transformer_blocks.9/Add_7_output_0.out10_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 537088 }, "/transformer_blocks.10/norm1_context/norm/LayerNormalization_output_0.out14_41_bfp.out15_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 540160 }, "/transformer_blocks.10/norm1_context/Add_2_output_0.out0_0_41_bfp.out1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 541696 }, "/transformer_blocks.10/attn/Concat_output_0.out22_10_bfp.out23_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 543232 }, "/transformer_blocks.10/attn/Concat_1_output_0.out22_10_bfp.out23_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 544768 }, "/transformer_blocks.10/attn/Concat_2_output_0.out22_10_bfp.out23_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 546304 }, "/transformer_blocks.10/attn/Reshape_3_output_0.out22_10_bfp.out27_0_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 547840 }, "/transformer_blocks.10/attn/to_out.0/Add_output_0.out6_1_20_bfp.out7_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 549376 }, "/transformer_blocks.10/Add_output_0.out10_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 550912 }, "/transformer_blocks.10/norm2/LayerNormalization_output_0.out14_42_bfp.out15_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 553984 }, "/transformer_blocks.10/Add_2_output_0.out0_0_42_bfp.out1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 555520 }, "/transformer_blocks.10/ff/net.0/Mul_5_output_0.out17_2_20_bfp.out25_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 557056 }, "/transformer_blocks.10/ff/net.2/Add_output_0.out17_3_101_bfp.out25_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 563200 }, "/transformer_blocks.10/Add_3_output_0.out10_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 564736 }, "/transformer_blocks.10/attn/to_add_out/Add_output_0.out6_1_21_bfp.out7_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 567808 }, "/transformer_blocks.10/Add_4_output_0.out10_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 569344 }, "/transformer_blocks.10/norm2_context/LayerNormalization_output_0.out14_43_bfp.out15_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 572416 }, "/transformer_blocks.10/Add_6_output_0.out0_0_43_bfp.out1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 573952 }, "/transformer_blocks.10/ff_context/net.0/Mul_5_output_0.out17_2_21_bfp.out25_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 575488 }, "/transformer_blocks.10/ff_context/net.2/Add_output_0.out17_3_102_bfp.out25_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 581632 }, "/transformer_blocks.10/Add_7_output_0.out10_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 583168 }, "/transformer_blocks.11/norm1/norm/LayerNormalization_output_0.out14_44_bfp.out15_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 586240 }, "/transformer_blocks.11/norm1/Add_2_output_0.out0_0_44_bfp.out1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 587776 }, "/transformer_blocks.11/norm1_context/norm/LayerNormalization_output_0.out14_45_bfp.out15_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 589312 }, "/transformer_blocks.11/norm1_context/Add_2_output_0.out0_0_45_bfp.out1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 590848 }, "/transformer_blocks.11/attn/Concat_output_0.out22_11_bfp.out23_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 592384 }, "/transformer_blocks.11/attn/Concat_1_output_0.out22_11_bfp.out23_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 593920 }, "/transformer_blocks.11/attn/Concat_2_output_0.out22_11_bfp.out23_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 595456 }, "/transformer_blocks.11/attn/Reshape_3_output_0.out22_11_bfp.out27_0_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 596992 }, "/transformer_blocks.11/attn/to_out.0/Add_output_0.out6_1_22_bfp.out7_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 598528 }, "/transformer_blocks.11/Add_output_0.out10_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 600064 }, "/transformer_blocks.11/norm2/LayerNormalization_output_0.out14_46_bfp.out15_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 603136 }, "/transformer_blocks.11/Add_2_output_0.out0_0_46_bfp.out1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 604672 }, "/transformer_blocks.11/ff/net.0/Mul_5_output_0.out17_2_22_bfp.out25_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 606208 }, "/transformer_blocks.11/ff/net.2/Add_output_0.out17_3_110_bfp.out25_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 612352 }, "/transformer_blocks.11/Add_3_output_0.out10_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 613888 }, "pos_embed.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 0, "file_name": "cache/pos_embedprojConv_0.const", "file_size": 135168 }, "pos_embed_input.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 135168, "file_name": "cache/pos_embedprojConv_1.const", "file_size": 135168 }, "time_text_embed.timestep_embedder.linear_1.weight_5_1_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 448512 ], "size_in_bytes": 448512, "op_tensor_size": 448512, "offset": 270336, "file_name": "cache/pos_embedprojConv_2.const", "file_size": 448512 }, "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 718848, "file_name": "cache/pos_embedprojConv_3.const", "file_size": 256 }, "time_text_embed.timestep_embedder.linear_2.weight_5_1_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 719104, "file_name": "cache/pos_embedprojConv_4.const", "file_size": 2691072 }, "time_text_embed.text_embedder.linear_1.weight_5_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 3588096 ], "size_in_bytes": 3588096, "op_tensor_size": 3588096, "offset": 3410176, "file_name": "cache/pos_embedprojConv_5.const", "file_size": 3588096 }, "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 6998272, "file_name": "cache/pos_embedprojConv_6.const", "file_size": 256 }, "time_text_embed.text_embedder.linear_2.weight_5_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 6998528, "file_name": "cache/pos_embedprojConv_7.const", "file_size": 2691072 }, "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 9689600, "file_name": "cache/pos_embedprojConv_8.const", "file_size": 256 }, "encoder_hidden_states.out17_3_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 9689856, "file_name": "cache/pos_embedprojConv_9.const", "file_size": 128 }, "onnx::MatMul_3779": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 7274496 ], "size_in_bytes": 7274496, "op_tensor_size": 7274496, "offset": 9689984, "file_name": "cache/pos_embedprojConv_10.const", "file_size": 7274496 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 16964480, "file_name": "cache/pos_embedprojConv_11.const", "file_size": 128 }, "/transformer_blocks.0/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16964608, "file_name": "cache/pos_embedprojConv_12.const", "file_size": 3072 }, "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16967680, "file_name": "cache/pos_embedprojConv_13.const", "file_size": 3072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 16970752, "file_name": "cache/pos_embedprojConv_14.const", "file_size": 5382144 }, "/transformer_blocks.0/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22352896, "file_name": "cache/pos_embedprojConv_15.const", "file_size": 3072 }, "/transformer_blocks.0/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22355968, "file_name": "cache/pos_embedprojConv_16.const", "file_size": 3072 }, "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 22359040, "file_name": "cache/pos_embedprojConv_17.const", "file_size": 5382144 }, "onnx::MatMul_3783_onnx::MatMul_3780": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 27741184, "file_name": "cache/pos_embedprojConv_18.const", "file_size": 5382144 }, "onnx::MatMul_3784_onnx::MatMul_3781": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 33123328, "file_name": "cache/pos_embedprojConv_19.const", "file_size": 5382144 }, "onnx::MatMul_3785_onnx::MatMul_3782": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 38505472, "file_name": "cache/pos_embedprojConv_20.const", "file_size": 5382144 }, "onnx::MatMul_3800": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 43887616, "file_name": "cache/pos_embedprojConv_21.const", "file_size": 2691072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 46578688, "file_name": "cache/pos_embedprojConv_22.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49269760, "file_name": "cache/pos_embedprojConv_23.const", "file_size": 3072 }, "/transformer_blocks.0/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49272832, "file_name": "cache/pos_embedprojConv_24.const", "file_size": 3072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_3_transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 49275904, "file_name": "cache/pos_embedprojConv_25.const", "file_size": 5382144 }, "onnx::MatMul_3803": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 54658048, "file_name": "cache/pos_embedprojConv_26.const", "file_size": 10764288 }, "onnx::MatMul_3804": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 65422336, "file_name": "cache/pos_embedprojConv_27.const", "file_size": 10764288 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_4_27_0_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 76186624, "file_name": "cache/pos_embedprojConv_28.const", "file_size": 2691072 }, "onnx::MatMul_3799": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 78877696, "file_name": "cache/pos_embedprojConv_29.const", "file_size": 2691072 }, "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 81568768, "file_name": "cache/pos_embedprojConv_30.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84259840, "file_name": "cache/pos_embedprojConv_31.const", "file_size": 3072 }, "/transformer_blocks.0/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84262912, "file_name": "cache/pos_embedprojConv_32.const", "file_size": 3072 }, "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 84265984, "file_name": "cache/pos_embedprojConv_33.const", "file_size": 5382144 }, "onnx::MatMul_3801": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 89648128, "file_name": "cache/pos_embedprojConv_34.const", "file_size": 10764288 }, "onnx::MatMul_3802": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 100412416, "file_name": "cache/pos_embedprojConv_35.const", "file_size": 10764288 }, "transformer_blocks.0.norm1.linear.weight_5_1_5_27_1_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 111176704, "file_name": "cache/pos_embedprojConv_36.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113867776, "file_name": "cache/pos_embedprojConv_37.const", "file_size": 3072 }, "/transformer_blocks.1/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113870848, "file_name": "cache/pos_embedprojConv_38.const", "file_size": 3072 }, "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 113873920, "file_name": "cache/pos_embedprojConv_39.const", "file_size": 5382144 }, "onnx::MatMul_4074": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 119256064, "file_name": "cache/pos_embedprojConv_40.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 121947136, "file_name": "cache/pos_embedprojConv_41.const", "file_size": 3072 }, "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 121950208, "file_name": "cache/pos_embedprojConv_42.const", "file_size": 3072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 121953280, "file_name": "cache/pos_embedprojConv_43.const", "file_size": 5382144 }, "onnx::MatMul_3808_onnx::MatMul_3805": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 127335424, "file_name": "cache/pos_embedprojConv_44.const", "file_size": 5382144 }, "onnx::MatMul_3809_onnx::MatMul_3806": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 132717568, "file_name": "cache/pos_embedprojConv_45.const", "file_size": 5382144 }, "onnx::MatMul_3810_onnx::MatMul_3807": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 138099712, "file_name": "cache/pos_embedprojConv_46.const", "file_size": 5382144 }, "onnx::MatMul_3824": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 143481856, "file_name": "cache/pos_embedprojConv_47.const", "file_size": 2691072 }, "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 146172928, "file_name": "cache/pos_embedprojConv_48.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 148864000, "file_name": "cache/pos_embedprojConv_49.const", "file_size": 3072 }, "/transformer_blocks.1/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 148867072, "file_name": "cache/pos_embedprojConv_50.const", "file_size": 3072 }, "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 148870144, "file_name": "cache/pos_embedprojConv_51.const", "file_size": 5382144 }, "onnx::MatMul_3826": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 154252288, "file_name": "cache/pos_embedprojConv_52.const", "file_size": 10764288 }, "onnx::MatMul_3827": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 165016576, "file_name": "cache/pos_embedprojConv_53.const", "file_size": 10764288 }, "transformer_blocks.1.norm1.linear.weight_5_1_7_27_3_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 175780864, "file_name": "cache/pos_embedprojConv_54.const", "file_size": 2691072 }, "onnx::MatMul_4075": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 178471936, "file_name": "cache/pos_embedprojConv_55.const", "file_size": 2691072 }, "onnx::MatMul_3825": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 181163008, "file_name": "cache/pos_embedprojConv_56.const", "file_size": 2691072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 183854080, "file_name": "cache/pos_embedprojConv_57.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 186545152, "file_name": "cache/pos_embedprojConv_58.const", "file_size": 3072 }, "/transformer_blocks.1/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 186548224, "file_name": "cache/pos_embedprojConv_59.const", "file_size": 3072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 186551296, "file_name": "cache/pos_embedprojConv_60.const", "file_size": 5382144 }, "onnx::MatMul_3828": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 191933440, "file_name": "cache/pos_embedprojConv_61.const", "file_size": 10764288 }, "onnx::MatMul_3829": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 202697728, "file_name": "cache/pos_embedprojConv_62.const", "file_size": 10764288 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_6_27_2_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 213462016, "file_name": "cache/pos_embedprojConv_63.const", "file_size": 2691072 }, "/transformer_blocks.2/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216153088, "file_name": "cache/pos_embedprojConv_64.const", "file_size": 3072 }, "/transformer_blocks.2/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216156160, "file_name": "cache/pos_embedprojConv_65.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216159232, "file_name": "cache/pos_embedprojConv_66.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216162304, "file_name": "cache/pos_embedprojConv_67.const", "file_size": 3072 }, "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 216165376, "file_name": "cache/pos_embedprojConv_68.const", "file_size": 5382144 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 221547520, "file_name": "cache/pos_embedprojConv_69.const", "file_size": 5382144 }, "onnx::MatMul_3833_onnx::MatMul_3830": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 226929664, "file_name": "cache/pos_embedprojConv_70.const", "file_size": 5382144 }, "onnx::MatMul_3834_onnx::MatMul_3831": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 232311808, "file_name": "cache/pos_embedprojConv_71.const", "file_size": 5382144 }, "onnx::MatMul_3835_onnx::MatMul_3832": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 237693952, "file_name": "cache/pos_embedprojConv_72.const", "file_size": 5382144 }, "onnx::MatMul_3850": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 243076096, "file_name": "cache/pos_embedprojConv_73.const", "file_size": 2691072 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 245767168, "file_name": "cache/pos_embedprojConv_74.const", "file_size": 2691072 }, "onnx::MatMul_3849": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 248458240, "file_name": "cache/pos_embedprojConv_75.const", "file_size": 2691072 }, "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 251149312, "file_name": "cache/pos_embedprojConv_76.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 253840384, "file_name": "cache/pos_embedprojConv_77.const", "file_size": 3072 }, "/transformer_blocks.2/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 253843456, "file_name": "cache/pos_embedprojConv_78.const", "file_size": 3072 }, "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 253846528, "file_name": "cache/pos_embedprojConv_79.const", "file_size": 5382144 }, "onnx::MatMul_3851": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 259228672, "file_name": "cache/pos_embedprojConv_80.const", "file_size": 10764288 }, "onnx::MatMul_3852": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 269992960, "file_name": "cache/pos_embedprojConv_81.const", "file_size": 10764288 }, "transformer_blocks.2.norm1.linear.weight_5_1_9_27_5_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 280757248, "file_name": "cache/pos_embedprojConv_82.const", "file_size": 2691072 }, "onnx::MatMul_4076": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 283448320, "file_name": "cache/pos_embedprojConv_83.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 286139392, "file_name": "cache/pos_embedprojConv_84.const", "file_size": 3072 }, "/transformer_blocks.2/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 286142464, "file_name": "cache/pos_embedprojConv_85.const", "file_size": 3072 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 286145536, "file_name": "cache/pos_embedprojConv_86.const", "file_size": 5382144 }, "onnx::MatMul_3853": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 291527680, "file_name": "cache/pos_embedprojConv_87.const", "file_size": 10764288 }, "onnx::MatMul_3854": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 302291968, "file_name": "cache/pos_embedprojConv_88.const", "file_size": 10764288 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_8_27_4_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 313056256, "file_name": "cache/pos_embedprojConv_89.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315747328, "file_name": "cache/pos_embedprojConv_90.const", "file_size": 3072 }, "/transformer_blocks.3/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315750400, "file_name": "cache/pos_embedprojConv_91.const", "file_size": 3072 }, "/transformer_blocks.3/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315753472, "file_name": "cache/pos_embedprojConv_92.const", "file_size": 3072 }, "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315756544, "file_name": "cache/pos_embedprojConv_93.const", "file_size": 3072 }, "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 315759616, "file_name": "cache/pos_embedprojConv_94.const", "file_size": 5382144 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 321141760, "file_name": "cache/pos_embedprojConv_95.const", "file_size": 5382144 }, "onnx::MatMul_3858_onnx::MatMul_3855": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 326523904, "file_name": "cache/pos_embedprojConv_96.const", "file_size": 5382144 }, "onnx::MatMul_3859_onnx::MatMul_3856": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 331906048, "file_name": "cache/pos_embedprojConv_97.const", "file_size": 5382144 }, "onnx::MatMul_3860_onnx::MatMul_3857": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 337288192, "file_name": "cache/pos_embedprojConv_98.const", "file_size": 5382144 }, "onnx::MatMul_3875": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 342670336, "file_name": "cache/pos_embedprojConv_99.const", "file_size": 2691072 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 345361408, "file_name": "cache/pos_embedprojConv_100.const", "file_size": 2691072 }, "onnx::MatMul_3874": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 348052480, "file_name": "cache/pos_embedprojConv_101.const", "file_size": 2691072 }, "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 350743552, "file_name": "cache/pos_embedprojConv_102.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 353434624, "file_name": "cache/pos_embedprojConv_103.const", "file_size": 3072 }, "/transformer_blocks.3/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 353437696, "file_name": "cache/pos_embedprojConv_104.const", "file_size": 3072 }, "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 353440768, "file_name": "cache/pos_embedprojConv_105.const", "file_size": 5382144 }, "onnx::MatMul_3876": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 358822912, "file_name": "cache/pos_embedprojConv_106.const", "file_size": 10764288 }, "onnx::MatMul_3877": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 369587200, "file_name": "cache/pos_embedprojConv_107.const", "file_size": 10764288 }, "transformer_blocks.3.norm1.linear.weight_5_1_11_27_7_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 380351488, "file_name": "cache/pos_embedprojConv_108.const", "file_size": 2691072 }, "onnx::MatMul_4077": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 383042560, "file_name": "cache/pos_embedprojConv_109.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 385733632, "file_name": "cache/pos_embedprojConv_110.const", "file_size": 3072 }, "/transformer_blocks.3/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 385736704, "file_name": "cache/pos_embedprojConv_111.const", "file_size": 3072 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 385739776, "file_name": "cache/pos_embedprojConv_112.const", "file_size": 5382144 }, "onnx::MatMul_3878": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 391121920, "file_name": "cache/pos_embedprojConv_113.const", "file_size": 10764288 }, "onnx::MatMul_3879": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 401886208, "file_name": "cache/pos_embedprojConv_114.const", "file_size": 10764288 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_10_27_6_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 412650496, "file_name": "cache/pos_embedprojConv_115.const", "file_size": 2691072 }, "/transformer_blocks.4/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415341568, "file_name": "cache/pos_embedprojConv_116.const", "file_size": 3072 }, "/transformer_blocks.4/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415344640, "file_name": "cache/pos_embedprojConv_117.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415347712, "file_name": "cache/pos_embedprojConv_118.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415350784, "file_name": "cache/pos_embedprojConv_119.const", "file_size": 3072 }, "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 415353856, "file_name": "cache/pos_embedprojConv_120.const", "file_size": 5382144 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 420736000, "file_name": "cache/pos_embedprojConv_121.const", "file_size": 5382144 }, "onnx::MatMul_3883_onnx::MatMul_3880": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 426118144, "file_name": "cache/pos_embedprojConv_122.const", "file_size": 5382144 }, "onnx::MatMul_3884_onnx::MatMul_3881": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 431500288, "file_name": "cache/pos_embedprojConv_123.const", "file_size": 5382144 }, "onnx::MatMul_3885_onnx::MatMul_3882": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 436882432, "file_name": "cache/pos_embedprojConv_124.const", "file_size": 5382144 }, "onnx::MatMul_3900": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 442264576, "file_name": "cache/pos_embedprojConv_125.const", "file_size": 2691072 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 444955648, "file_name": "cache/pos_embedprojConv_126.const", "file_size": 2691072 }, "onnx::MatMul_3899": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 447646720, "file_name": "cache/pos_embedprojConv_127.const", "file_size": 2691072 }, "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 450337792, "file_name": "cache/pos_embedprojConv_128.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 453028864, "file_name": "cache/pos_embedprojConv_129.const", "file_size": 3072 }, "/transformer_blocks.4/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 453031936, "file_name": "cache/pos_embedprojConv_130.const", "file_size": 3072 }, "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 453035008, "file_name": "cache/pos_embedprojConv_131.const", "file_size": 5382144 }, "onnx::MatMul_3901": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 458417152, "file_name": "cache/pos_embedprojConv_132.const", "file_size": 10764288 }, "onnx::MatMul_3902": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 469181440, "file_name": "cache/pos_embedprojConv_133.const", "file_size": 10764288 }, "transformer_blocks.4.norm1.linear.weight_5_1_13_27_9_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 479945728, "file_name": "cache/pos_embedprojConv_134.const", "file_size": 2691072 }, "onnx::MatMul_4078": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 482636800, "file_name": "cache/pos_embedprojConv_135.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485327872, "file_name": "cache/pos_embedprojConv_136.const", "file_size": 3072 }, "/transformer_blocks.4/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485330944, "file_name": "cache/pos_embedprojConv_137.const", "file_size": 3072 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 485334016, "file_name": "cache/pos_embedprojConv_138.const", "file_size": 5382144 }, "onnx::MatMul_3903": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 490716160, "file_name": "cache/pos_embedprojConv_139.const", "file_size": 10764288 }, "onnx::MatMul_3904": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 501480448, "file_name": "cache/pos_embedprojConv_140.const", "file_size": 10764288 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_12_27_8_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 512244736, "file_name": "cache/pos_embedprojConv_141.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514935808, "file_name": "cache/pos_embedprojConv_142.const", "file_size": 3072 }, "/transformer_blocks.5/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514938880, "file_name": "cache/pos_embedprojConv_143.const", "file_size": 3072 }, "/transformer_blocks.5/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514941952, "file_name": "cache/pos_embedprojConv_144.const", "file_size": 3072 }, "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514945024, "file_name": "cache/pos_embedprojConv_145.const", "file_size": 3072 }, "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 514948096, "file_name": "cache/pos_embedprojConv_146.const", "file_size": 5382144 }, "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 520330240, "file_name": "cache/pos_embedprojConv_147.const", "file_size": 5382144 }, "onnx::MatMul_3908_onnx::MatMul_3905": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 525712384, "file_name": "cache/pos_embedprojConv_148.const", "file_size": 5382144 }, "onnx::MatMul_3909_onnx::MatMul_3906": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 531094528, "file_name": "cache/pos_embedprojConv_149.const", "file_size": 5382144 }, "onnx::MatMul_3910_onnx::MatMul_3907": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 536476672, "file_name": "cache/pos_embedprojConv_150.const", "file_size": 5382144 }, "onnx::MatMul_3925": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 541858816, "file_name": "cache/pos_embedprojConv_151.const", "file_size": 2691072 }, "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 544549888, "file_name": "cache/pos_embedprojConv_152.const", "file_size": 2691072 }, "onnx::MatMul_3924": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 547240960, "file_name": "cache/pos_embedprojConv_153.const", "file_size": 2691072 }, "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 549932032, "file_name": "cache/pos_embedprojConv_154.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 552623104, "file_name": "cache/pos_embedprojConv_155.const", "file_size": 3072 }, "/transformer_blocks.5/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 552626176, "file_name": "cache/pos_embedprojConv_156.const", "file_size": 3072 }, "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_3_transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 552629248, "file_name": "cache/pos_embedprojConv_157.const", "file_size": 5382144 }, "onnx::MatMul_3926": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 558011392, "file_name": "cache/pos_embedprojConv_158.const", "file_size": 10764288 }, "onnx::MatMul_3927": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 568775680, "file_name": "cache/pos_embedprojConv_159.const", "file_size": 10764288 }, "transformer_blocks.5.norm1.linear.weight_5_1_15_27_11_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 579539968, "file_name": "cache/pos_embedprojConv_160.const", "file_size": 2691072 }, "onnx::MatMul_4079": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 582231040, "file_name": "cache/pos_embedprojConv_161.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 584922112, "file_name": "cache/pos_embedprojConv_162.const", "file_size": 3072 }, "/transformer_blocks.5/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 584925184, "file_name": "cache/pos_embedprojConv_163.const", "file_size": 3072 }, "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 584928256, "file_name": "cache/pos_embedprojConv_164.const", "file_size": 5382144 }, "onnx::MatMul_3928": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 590310400, "file_name": "cache/pos_embedprojConv_165.const", "file_size": 10764288 }, "onnx::MatMul_3929": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 601074688, "file_name": "cache/pos_embedprojConv_166.const", "file_size": 10764288 }, "transformer_blocks.5.norm1_context.linear.weight_5_1_14_27_10_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 611838976, "file_name": "cache/pos_embedprojConv_167.const", "file_size": 2691072 }, "/transformer_blocks.6/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 614530048, "file_name": "cache/pos_embedprojConv_168.const", "file_size": 3072 }, "/transformer_blocks.6/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 614533120, "file_name": "cache/pos_embedprojConv_169.const", "file_size": 3072 }, "/transformer_blocks.6/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 614536192, "file_name": "cache/pos_embedprojConv_170.const", "file_size": 3072 }, "/transformer_blocks.6/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 614539264, "file_name": "cache/pos_embedprojConv_171.const", "file_size": 3072 }, "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_0_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 614542336, "file_name": "cache/pos_embedprojConv_172.const", "file_size": 5382144 }, "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_0_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 619924480, "file_name": "cache/pos_embedprojConv_173.const", "file_size": 5382144 }, "onnx::MatMul_3933_onnx::MatMul_3930": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 625306624, "file_name": "cache/pos_embedprojConv_174.const", "file_size": 5382144 }, "onnx::MatMul_3934_onnx::MatMul_3931": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 630688768, "file_name": "cache/pos_embedprojConv_175.const", "file_size": 5382144 }, "onnx::MatMul_3935_onnx::MatMul_3932": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 636070912, "file_name": "cache/pos_embedprojConv_176.const", "file_size": 5382144 }, "onnx::MatMul_3950": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 641453056, "file_name": "cache/pos_embedprojConv_177.const", "file_size": 2691072 }, "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 644144128, "file_name": "cache/pos_embedprojConv_178.const", "file_size": 2691072 }, "onnx::MatMul_3949": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 646835200, "file_name": "cache/pos_embedprojConv_179.const", "file_size": 2691072 }, "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 649526272, "file_name": "cache/pos_embedprojConv_180.const", "file_size": 2691072 }, "/transformer_blocks.6/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 652217344, "file_name": "cache/pos_embedprojConv_181.const", "file_size": 3072 }, "/transformer_blocks.6/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 652220416, "file_name": "cache/pos_embedprojConv_182.const", "file_size": 3072 }, "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_3_transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 652223488, "file_name": "cache/pos_embedprojConv_183.const", "file_size": 5382144 }, "onnx::MatMul_3951": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 657605632, "file_name": "cache/pos_embedprojConv_184.const", "file_size": 10764288 }, "onnx::MatMul_3952": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 668369920, "file_name": "cache/pos_embedprojConv_185.const", "file_size": 10764288 }, "transformer_blocks.6.norm1.linear.weight_5_1_17_27_13_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 679134208, "file_name": "cache/pos_embedprojConv_186.const", "file_size": 2691072 }, "onnx::MatMul_4080": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 681825280, "file_name": "cache/pos_embedprojConv_187.const", "file_size": 2691072 }, "/transformer_blocks.6/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 684516352, "file_name": "cache/pos_embedprojConv_188.const", "file_size": 3072 }, "/transformer_blocks.6/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 684519424, "file_name": "cache/pos_embedprojConv_189.const", "file_size": 3072 }, "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_3_transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 684522496, "file_name": "cache/pos_embedprojConv_190.const", "file_size": 5382144 }, "onnx::MatMul_3953": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 689904640, "file_name": "cache/pos_embedprojConv_191.const", "file_size": 10764288 }, "onnx::MatMul_3954": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 700668928, "file_name": "cache/pos_embedprojConv_192.const", "file_size": 10764288 }, "transformer_blocks.6.norm1_context.linear.weight_5_1_16_27_12_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 711433216, "file_name": "cache/pos_embedprojConv_193.const", "file_size": 2691072 }, "/transformer_blocks.7/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 714124288, "file_name": "cache/pos_embedprojConv_194.const", "file_size": 3072 }, "/transformer_blocks.7/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 714127360, "file_name": "cache/pos_embedprojConv_195.const", "file_size": 3072 }, "/transformer_blocks.7/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 714130432, "file_name": "cache/pos_embedprojConv_196.const", "file_size": 3072 }, "/transformer_blocks.7/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 714133504, "file_name": "cache/pos_embedprojConv_197.const", "file_size": 3072 }, "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_0_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 714136576, "file_name": "cache/pos_embedprojConv_198.const", "file_size": 5382144 }, "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_0_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 719518720, "file_name": "cache/pos_embedprojConv_199.const", "file_size": 5382144 }, "onnx::MatMul_3958_onnx::MatMul_3955": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 724900864, "file_name": "cache/pos_embedprojConv_200.const", "file_size": 5382144 }, "onnx::MatMul_3959_onnx::MatMul_3956": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 730283008, "file_name": "cache/pos_embedprojConv_201.const", "file_size": 5382144 }, "onnx::MatMul_3960_onnx::MatMul_3957": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 735665152, "file_name": "cache/pos_embedprojConv_202.const", "file_size": 5382144 }, "onnx::MatMul_3975": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 741047296, "file_name": "cache/pos_embedprojConv_203.const", "file_size": 2691072 }, "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 743738368, "file_name": "cache/pos_embedprojConv_204.const", "file_size": 2691072 }, "onnx::MatMul_3974": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 746429440, "file_name": "cache/pos_embedprojConv_205.const", "file_size": 2691072 }, "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 749120512, "file_name": "cache/pos_embedprojConv_206.const", "file_size": 2691072 }, "/transformer_blocks.7/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 751811584, "file_name": "cache/pos_embedprojConv_207.const", "file_size": 3072 }, "/transformer_blocks.7/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 751814656, "file_name": "cache/pos_embedprojConv_208.const", "file_size": 3072 }, "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_3_transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 751817728, "file_name": "cache/pos_embedprojConv_209.const", "file_size": 5382144 }, "onnx::MatMul_3976": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 757199872, "file_name": "cache/pos_embedprojConv_210.const", "file_size": 10764288 }, "onnx::MatMul_3977": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 767964160, "file_name": "cache/pos_embedprojConv_211.const", "file_size": 10764288 }, "transformer_blocks.7.norm1.linear.weight_5_1_19_27_15_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 778728448, "file_name": "cache/pos_embedprojConv_212.const", "file_size": 2691072 }, "onnx::MatMul_4081": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 781419520, "file_name": "cache/pos_embedprojConv_213.const", "file_size": 2691072 }, "/transformer_blocks.7/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 784110592, "file_name": "cache/pos_embedprojConv_214.const", "file_size": 3072 }, "/transformer_blocks.7/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 784113664, "file_name": "cache/pos_embedprojConv_215.const", "file_size": 3072 }, "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_3_transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 784116736, "file_name": "cache/pos_embedprojConv_216.const", "file_size": 5382144 }, "onnx::MatMul_3978": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 789498880, "file_name": "cache/pos_embedprojConv_217.const", "file_size": 10764288 }, "onnx::MatMul_3979": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 800263168, "file_name": "cache/pos_embedprojConv_218.const", "file_size": 10764288 }, "transformer_blocks.7.norm1_context.linear.weight_5_1_18_27_14_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 811027456, "file_name": "cache/pos_embedprojConv_219.const", "file_size": 2691072 }, "/transformer_blocks.8/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 813718528, "file_name": "cache/pos_embedprojConv_220.const", "file_size": 3072 }, "/transformer_blocks.8/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 813721600, "file_name": "cache/pos_embedprojConv_221.const", "file_size": 3072 }, "/transformer_blocks.8/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 813724672, "file_name": "cache/pos_embedprojConv_222.const", "file_size": 3072 }, "/transformer_blocks.8/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 813727744, "file_name": "cache/pos_embedprojConv_223.const", "file_size": 3072 }, "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_0_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 813730816, "file_name": "cache/pos_embedprojConv_224.const", "file_size": 5382144 }, "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_0_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 819112960, "file_name": "cache/pos_embedprojConv_225.const", "file_size": 5382144 }, "onnx::MatMul_3983_onnx::MatMul_3980": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 824495104, "file_name": "cache/pos_embedprojConv_226.const", "file_size": 5382144 }, "onnx::MatMul_3984_onnx::MatMul_3981": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 829877248, "file_name": "cache/pos_embedprojConv_227.const", "file_size": 5382144 }, "onnx::MatMul_3985_onnx::MatMul_3982": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 835259392, "file_name": "cache/pos_embedprojConv_228.const", "file_size": 5382144 }, "onnx::MatMul_4000": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 840641536, "file_name": "cache/pos_embedprojConv_229.const", "file_size": 2691072 }, "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 843332608, "file_name": "cache/pos_embedprojConv_230.const", "file_size": 2691072 }, "onnx::MatMul_3999": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 846023680, "file_name": "cache/pos_embedprojConv_231.const", "file_size": 2691072 }, "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 848714752, "file_name": "cache/pos_embedprojConv_232.const", "file_size": 2691072 }, "/transformer_blocks.8/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 851405824, "file_name": "cache/pos_embedprojConv_233.const", "file_size": 3072 }, "/transformer_blocks.8/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 851408896, "file_name": "cache/pos_embedprojConv_234.const", "file_size": 3072 }, "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_3_transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 851411968, "file_name": "cache/pos_embedprojConv_235.const", "file_size": 5382144 }, "onnx::MatMul_4001": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 856794112, "file_name": "cache/pos_embedprojConv_236.const", "file_size": 10764288 }, "onnx::MatMul_4002": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 867558400, "file_name": "cache/pos_embedprojConv_237.const", "file_size": 10764288 }, "transformer_blocks.8.norm1.linear.weight_5_1_21_27_17_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 878322688, "file_name": "cache/pos_embedprojConv_238.const", "file_size": 2691072 }, "onnx::MatMul_4082": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 881013760, "file_name": "cache/pos_embedprojConv_239.const", "file_size": 2691072 }, "/transformer_blocks.8/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 883704832, "file_name": "cache/pos_embedprojConv_240.const", "file_size": 3072 }, "/transformer_blocks.8/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 883707904, "file_name": "cache/pos_embedprojConv_241.const", "file_size": 3072 }, "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_3_transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 883710976, "file_name": "cache/pos_embedprojConv_242.const", "file_size": 5382144 }, "onnx::MatMul_4003": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 889093120, "file_name": "cache/pos_embedprojConv_243.const", "file_size": 10764288 }, "onnx::MatMul_4004": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 899857408, "file_name": "cache/pos_embedprojConv_244.const", "file_size": 10764288 }, "transformer_blocks.8.norm1_context.linear.weight_5_1_20_27_16_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 910621696, "file_name": "cache/pos_embedprojConv_245.const", "file_size": 2691072 }, "/transformer_blocks.9/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913312768, "file_name": "cache/pos_embedprojConv_246.const", "file_size": 3072 }, "/transformer_blocks.9/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913315840, "file_name": "cache/pos_embedprojConv_247.const", "file_size": 3072 }, "/transformer_blocks.9/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913318912, "file_name": "cache/pos_embedprojConv_248.const", "file_size": 3072 }, "/transformer_blocks.9/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 913321984, "file_name": "cache/pos_embedprojConv_249.const", "file_size": 3072 }, "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_0_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 913325056, "file_name": "cache/pos_embedprojConv_250.const", "file_size": 5382144 }, "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_0_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 918707200, "file_name": "cache/pos_embedprojConv_251.const", "file_size": 5382144 }, "onnx::MatMul_4008_onnx::MatMul_4005": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 924089344, "file_name": "cache/pos_embedprojConv_252.const", "file_size": 5382144 }, "onnx::MatMul_4009_onnx::MatMul_4006": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 929471488, "file_name": "cache/pos_embedprojConv_253.const", "file_size": 5382144 }, "onnx::MatMul_4010_onnx::MatMul_4007": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 934853632, "file_name": "cache/pos_embedprojConv_254.const", "file_size": 5382144 }, "onnx::MatMul_4025": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 940235776, "file_name": "cache/pos_embedprojConv_255.const", "file_size": 2691072 }, "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 942926848, "file_name": "cache/pos_embedprojConv_256.const", "file_size": 2691072 }, "onnx::MatMul_4024": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 945617920, "file_name": "cache/pos_embedprojConv_257.const", "file_size": 2691072 }, "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 948308992, "file_name": "cache/pos_embedprojConv_258.const", "file_size": 2691072 }, "/transformer_blocks.9/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 951000064, "file_name": "cache/pos_embedprojConv_259.const", "file_size": 3072 }, "/transformer_blocks.9/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 951003136, "file_name": "cache/pos_embedprojConv_260.const", "file_size": 3072 }, "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_3_transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 951006208, "file_name": "cache/pos_embedprojConv_261.const", "file_size": 5382144 }, "onnx::MatMul_4026": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 956388352, "file_name": "cache/pos_embedprojConv_262.const", "file_size": 10764288 }, "onnx::MatMul_4027": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 967152640, "file_name": "cache/pos_embedprojConv_263.const", "file_size": 10764288 }, "transformer_blocks.9.norm1.linear.weight_5_1_23_27_19_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 977916928, "file_name": "cache/pos_embedprojConv_264.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 980608000, "file_name": "cache/pos_embedprojConv_265.const", "file_size": 3072 }, "/transformer_blocks.10/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 980611072, "file_name": "cache/pos_embedprojConv_266.const", "file_size": 3072 }, "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_0_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 980614144, "file_name": "cache/pos_embedprojConv_267.const", "file_size": 5382144 }, "onnx::MatMul_4083": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 985996288, "file_name": "cache/pos_embedprojConv_268.const", "file_size": 2691072 }, "/transformer_blocks.9/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 988687360, "file_name": "cache/pos_embedprojConv_269.const", "file_size": 3072 }, "/transformer_blocks.9/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 988690432, "file_name": "cache/pos_embedprojConv_270.const", "file_size": 3072 }, "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_3_transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 988693504, "file_name": "cache/pos_embedprojConv_271.const", "file_size": 5382144 }, "onnx::MatMul_4028": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 994075648, "file_name": "cache/pos_embedprojConv_272.const", "file_size": 10764288 }, "onnx::MatMul_4029": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1004839936, "file_name": "cache/pos_embedprojConv_273.const", "file_size": 10764288 }, "transformer_blocks.9.norm1_context.linear.weight_5_1_22_27_18_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1015604224, "file_name": "cache/pos_embedprojConv_274.const", "file_size": 2691072 }, "/transformer_blocks.10/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1018295296, "file_name": "cache/pos_embedprojConv_275.const", "file_size": 3072 }, "/transformer_blocks.10/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1018298368, "file_name": "cache/pos_embedprojConv_276.const", "file_size": 3072 }, "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_0_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1018301440, "file_name": "cache/pos_embedprojConv_277.const", "file_size": 5382144 }, "onnx::MatMul_4033_onnx::MatMul_4030": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1023683584, "file_name": "cache/pos_embedprojConv_278.const", "file_size": 5382144 }, "onnx::MatMul_4034_onnx::MatMul_4031": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1029065728, "file_name": "cache/pos_embedprojConv_279.const", "file_size": 5382144 }, "onnx::MatMul_4035_onnx::MatMul_4032": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1034447872, "file_name": "cache/pos_embedprojConv_280.const", "file_size": 5382144 }, "onnx::MatMul_4049": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1039830016, "file_name": "cache/pos_embedprojConv_281.const", "file_size": 2691072 }, "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1042521088, "file_name": "cache/pos_embedprojConv_282.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1045212160, "file_name": "cache/pos_embedprojConv_283.const", "file_size": 3072 }, "/transformer_blocks.10/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1045215232, "file_name": "cache/pos_embedprojConv_284.const", "file_size": 3072 }, "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_3_transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1045218304, "file_name": "cache/pos_embedprojConv_285.const", "file_size": 5382144 }, "onnx::MatMul_4051": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1050600448, "file_name": "cache/pos_embedprojConv_286.const", "file_size": 10764288 }, "onnx::MatMul_4052": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1061364736, "file_name": "cache/pos_embedprojConv_287.const", "file_size": 10764288 }, "transformer_blocks.10.norm1.linear.weight_5_1_25_27_21_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1072129024, "file_name": "cache/pos_embedprojConv_288.const", "file_size": 2691072 }, "onnx::MatMul_4084": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1074820096, "file_name": "cache/pos_embedprojConv_289.const", "file_size": 2691072 }, "onnx::MatMul_4050": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1077511168, "file_name": "cache/pos_embedprojConv_290.const", "file_size": 2691072 }, "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1080202240, "file_name": "cache/pos_embedprojConv_291.const", "file_size": 2691072 }, "/transformer_blocks.10/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082893312, "file_name": "cache/pos_embedprojConv_292.const", "file_size": 3072 }, "/transformer_blocks.10/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1082896384, "file_name": "cache/pos_embedprojConv_293.const", "file_size": 3072 }, "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_3_transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1082899456, "file_name": "cache/pos_embedprojConv_294.const", "file_size": 5382144 }, "onnx::MatMul_4053": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1088281600, "file_name": "cache/pos_embedprojConv_295.const", "file_size": 10764288 }, "onnx::MatMul_4054": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1099045888, "file_name": "cache/pos_embedprojConv_296.const", "file_size": 10764288 }, "transformer_blocks.10.norm1_context.linear.weight_5_1_24_27_20_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1109810176, "file_name": "cache/pos_embedprojConv_297.const", "file_size": 2691072 }, "/transformer_blocks.11/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1112501248, "file_name": "cache/pos_embedprojConv_298.const", "file_size": 3072 }, "/transformer_blocks.11/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1112504320, "file_name": "cache/pos_embedprojConv_299.const", "file_size": 3072 }, "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_0_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1112507392, "file_name": "cache/pos_embedprojConv_300.const", "file_size": 5382144 }, "/transformer_blocks.11/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1117889536, "file_name": "cache/pos_embedprojConv_301.const", "file_size": 3072 }, "/transformer_blocks.11/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1117892608, "file_name": "cache/pos_embedprojConv_302.const", "file_size": 3072 }, "transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_0_transformer_blocks.11.norm1_context.linear.weight_5_1_26_27_22_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1117895680, "file_name": "cache/pos_embedprojConv_303.const", "file_size": 5382144 }, "onnx::MatMul_4058_onnx::MatMul_4055": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1123277824, "file_name": "cache/pos_embedprojConv_304.const", "file_size": 5382144 }, "onnx::MatMul_4059_onnx::MatMul_4056": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1128659968, "file_name": "cache/pos_embedprojConv_305.const", "file_size": 5382144 }, "onnx::MatMul_4060_onnx::MatMul_4057": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1134042112, "file_name": "cache/pos_embedprojConv_306.const", "file_size": 5382144 }, "onnx::MatMul_4071": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1139424256, "file_name": "cache/pos_embedprojConv_307.const", "file_size": 2691072 }, "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1142115328, "file_name": "cache/pos_embedprojConv_308.const", "file_size": 2691072 }, "/transformer_blocks.11/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1144806400, "file_name": "cache/pos_embedprojConv_309.const", "file_size": 3072 }, "/transformer_blocks.11/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1144809472, "file_name": "cache/pos_embedprojConv_310.const", "file_size": 3072 }, "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_3_transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 1144812544, "file_name": "cache/pos_embedprojConv_311.const", "file_size": 5382144 }, "onnx::MatMul_4072": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1150194688, "file_name": "cache/pos_embedprojConv_312.const", "file_size": 10764288 }, "onnx::MatMul_4073": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 1160958976, "file_name": "cache/pos_embedprojConv_313.const", "file_size": 10764288 }, "transformer_blocks.11.norm1.linear.weight_5_1_27_27_23_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1171723264, "file_name": "cache/pos_embedprojConv_314.const", "file_size": 2691072 }, "onnx::MatMul_4085": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 1174414336, "file_name": "cache/pos_embedprojConv_315.const", "file_size": 2691072 } }, "dynamic_shape_subgraph": true, "dynamic_shape_list": [ { "floor(w/2)": 32, "h": 64, "max_length": 160, "w": 64, "max_length + floor(h/2)*floor(w/2)": 1184, "batch_size": 2, "floor(h/2)": 32, "floor(h/2)*floor(w/2)": 1024 }, { "floor(w/2)": 48, "h": 64, "max_length": 160, "w": 96, "max_length + floor(h/2)*floor(w/2)": 1696, "batch_size": 2, "floor(h/2)": 32, "floor(h/2)*floor(w/2)": 1536 }, { "floor(w/2)": 32, "h": 96, "max_length": 160, "w": 64, "max_length + floor(h/2)*floor(w/2)": 1696, "batch_size": 2, "floor(h/2)": 48, "floor(h/2)*floor(w/2)": 1536 }, { "floor(w/2)": 64, "h": 72, "max_length": 160, "w": 128, "max_length + floor(h/2)*floor(w/2)": 2464, "batch_size": 2, "floor(h/2)": 36, "floor(h/2)*floor(w/2)": 2304 }, { "floor(w/2)": 36, "h": 128, "max_length": 160, "w": 72, "max_length + floor(h/2)*floor(w/2)": 2464, "batch_size": 2, "floor(h/2)": 64, "floor(h/2)*floor(w/2)": 2304 }, { "floor(w/2)": 64, "h": 96, "max_length": 160, "w": 128, "max_length + floor(h/2)*floor(w/2)": 3232, "batch_size": 2, "floor(h/2)": 48, "floor(h/2)*floor(w/2)": 3072 }, { "floor(w/2)": 48, "h": 128, "max_length": 160, "w": 96, "max_length + floor(h/2)*floor(w/2)": 3232, "batch_size": 2, "floor(h/2)": 64, "floor(h/2)*floor(w/2)": 3072 }, { "floor(w/2)": 64, "h": 128, "max_length": 160, "w": 128, "max_length + floor(h/2)*floor(w/2)": 4256, "batch_size": 2, "floor(h/2)": 64, "floor(h/2)*floor(w/2)": 4096 } ], "aux_info": {} }