{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [], "op_list": [ { "name": "/pos_embed/proj/Conv", "type": "SDConv", "in_args": [ "hidden_states_nhwc.out5_0_0" ], "const_args": [ "pos_embed.proj.weight" ], "out_args": [ "/pos_embed/Transpose_output_0.out5_0_0" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed/Add_2", "type": "SDAdd", "in_args": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2" ], "const_args": [], "out_args": [ "/pos_embed/Add_2_output_0.out_35_1_2" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "b_shape": { "type": "str", "value": [ "1", "floor(h/2)*floor(w/2)", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/pos_embed_input/proj/Conv", "type": "SDConv", "in_args": [ "controlnet_cond_nhwc.out5_0_1" ], "const_args": [ "pos_embed_input.proj.weight" ], "out_args": [ "/pos_embed_input/Transpose_output_0.out5_0_1" ], "attrs": { "auto_pad": { "type": "str", "value": [ "NOTSET" ] }, "dilations": { "type": "int", "value": [ "1", "1" ] }, "group": { "type": "int", "value": [ "1" ] }, "kernel_shape": { "type": "int", "value": [ "2", "2" ] }, "pads": { "type": "int", "value": [ "0", "0", "0", "0" ] }, "strides": { "type": "int", "value": [ "2", "2" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "w", "h", "16" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(w/2)", "floor(h/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "2", "2", "16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "float" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/Add", "type": "SDAdd", "in_args": [ "/pos_embed/Add_2_output_0.out_35_1_2", "/pos_embed_input/Transpose_output_0.out5_0_1" ], "const_args": [], "out_args": [ "/Add_output_0.out_35_1_3" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/Cast_output_0.out17_3_3" ], "const_args": [ "time_text_embed.timestep_embedder.linear_1.weight_5_1_2" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "256" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "256", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/act/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" ], "const_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1" ], "out_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/timestep_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" ], "const_args": [ "time_text_embed.timestep_embedder.linear_2.weight_5_1_3" ], "out_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_1/Gemm", "type": "SDGemm", "in_args": [ "pooled_projections.out17_3_1" ], "const_args": [ "time_text_embed.text_embedder.linear_1.weight_5_1_0" ], "out_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "2048" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "2048", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/act_1/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" ], "const_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0" ], "out_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/text_embedder/linear_2/Gemm", "type": "SDGemm", "in_args": [ "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" ], "const_args": [ "time_text_embed.text_embedder.linear_2.weight_5_1_1" ], "out_args": [ "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/time_text_embed/Add", "type": "SDAdd", "in_args": [ "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" ], "const_args": [], "out_args": [ "/time_text_embed/Add_output_0.out_35_1_4" ], "attrs": { "a_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "b_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "c_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "is_bias_add": { "type": "int", "value": [ "0" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/silu/Sigmoid", "type": "SDSilu", "in_args": [ "/time_text_embed/Add_output_0.out_35_1_4" ], "const_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2" ], "out_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "weight_shape": { "type": "int", "value": [ "128" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp", "type": "SDCastBf2Bfp", "in_args": [ "encoder_hidden_states.out17_3_0" ], "const_args": [ "encoder_hidden_states.out17_3_0_bfp.wts" ], "out_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/MatMul", "type": "SDGemm_bfp", "in_args": [ "encoder_hidden_states.out17_3_0_bfp.out25_0" ], "const_args": [ "onnx::MatMul_1943" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "4096" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "4096", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf", "type": "SDCastBfp2Bf", "in_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" ], "const_args": [ "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts" ], "out_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0" ], "out_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/Add_output_0.out_35_1_3" ], "const_args": [ "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3" ], "out_args": [ "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" ], "const_args": [ "onnx::MatMul_1947_onnx::MatMul_1944" ], "out_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" ], "const_args": [ "onnx::MatMul_1948_onnx::MatMul_1945" ], "out_args": [ "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" ], "const_args": [ "onnx::MatMul_1949_onnx::MatMul_1946" ], "out_args": [ "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/MatMulmha_18_0_0", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" ], "const_args": [], "out_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_1964" ], "out_args": [ "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/context_embedder/Add_output_0.out17_3_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma" ], "out_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2" ], "out_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" ], "const_args": [ "onnx::MatMul_1963" ], "out_args": [ "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" ], "const_args": [ "onnx::MatMul_1967" ], "out_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" ], "const_args": [ "onnx::MatMul_1968" ], "out_args": [ "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_4_output_0.out10_0" ], "const_args": [ "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/Add_output_0.out_35_1_3" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma" ], "out_args": [ "/transformer_blocks.0/Add_output_0.out10_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_output_0.out10_20" ], "const_args": [ "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0" ], "out_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21" ], "const_args": [ "onnx::MatMul_1965" ], "out_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" ], "const_args": [ "onnx::MatMul_1966" ], "out_args": [ "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.0/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_output_0.out10_20" ], "const_args": [ "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma" ], "out_args": [ "/transformer_blocks.0/Add_3_output_0.out10_21" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out10_21" ], "const_args": [ "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1" ], "out_args": [ "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.0/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.0/Add_3_output_0.out10_21" ], "const_args": [ "onnx::MatMul_2088" ], "out_args": [ "/controlnet_blocks.0/Add_output_0.out17_3_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1" ], "out_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_1972_onnx::MatMul_1969" ], "out_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_1973_onnx::MatMul_1970" ], "out_args": [ "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" ], "const_args": [ "onnx::MatMul_1974_onnx::MatMul_1971" ], "out_args": [ "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/MatMulmha_18_0_1", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" ], "const_args": [], "out_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_1988" ], "out_args": [ "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.0/Add_3_output_0.out10_21" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4" ], "out_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" ], "const_args": [ "onnx::MatMul_1990" ], "out_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" ], "const_args": [ "onnx::MatMul_1991" ], "out_args": [ "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_output_0.out10_2" ], "const_args": [ "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.1/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "onnx::MatMul_2089" ], "out_args": [ "/controlnet_blocks.1/Add_output_0.out17_3_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" ], "const_args": [ "onnx::MatMul_1989" ], "out_args": [ "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.0/Add_7_output_0.out10_1" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma" ], "out_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4" ], "out_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" ], "const_args": [ "onnx::MatMul_1992" ], "out_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" ], "const_args": [ "onnx::MatMul_1993" ], "out_args": [ "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.1/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", "/transformer_blocks.1/Add_4_output_0.out10_4" ], "const_args": [ "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma" ], "out_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1" ], "out_args": [ "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1" ], "out_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" ], "const_args": [ "onnx::MatMul_1997_onnx::MatMul_1994" ], "out_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" ], "const_args": [ "onnx::MatMul_1998_onnx::MatMul_1995" ], "out_args": [ "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" ], "const_args": [ "onnx::MatMul_1999_onnx::MatMul_1996" ], "out_args": [ "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/MatMulmha_18_0_2", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" ], "const_args": [], "out_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_2013" ], "out_args": [ "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.1/Add_3_output_0.out10_3" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_output_0.out10_6" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_output_0.out10_6" ], "const_args": [ "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4" ], "out_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7" ], "const_args": [ "onnx::MatMul_2015" ], "out_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" ], "const_args": [ "onnx::MatMul_2016" ], "out_args": [ "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", "/transformer_blocks.2/Add_output_0.out10_6" ], "const_args": [ "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_3_output_0.out10_7" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.2/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out10_7" ], "const_args": [ "onnx::MatMul_2090" ], "out_args": [ "/controlnet_blocks.2/Add_output_0.out17_3_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" ], "const_args": [ "onnx::MatMul_2014" ], "out_args": [ "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.1/Add_7_output_0.out10_5" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma" ], "out_args": [ "/transformer_blocks.2/Add_4_output_0.out10_8" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_4_output_0.out10_8" ], "const_args": [ "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4" ], "out_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9" ], "const_args": [ "onnx::MatMul_2017" ], "out_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" ], "const_args": [ "onnx::MatMul_2018" ], "out_args": [ "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.2/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", "/transformer_blocks.2/Add_4_output_0.out10_8" ], "const_args": [ "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma" ], "out_args": [ "/transformer_blocks.2/Add_7_output_0.out10_9" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_3_output_0.out10_7" ], "const_args": [ "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1" ], "out_args": [ "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.2/Add_7_output_0.out10_9" ], "const_args": [ "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1" ], "out_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" ], "const_args": [ "onnx::MatMul_2022_onnx::MatMul_2019" ], "out_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" ], "const_args": [ "onnx::MatMul_2023_onnx::MatMul_2020" ], "out_args": [ "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" ], "const_args": [ "onnx::MatMul_2024_onnx::MatMul_2021" ], "out_args": [ "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/MatMulmha_18_0_3", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" ], "const_args": [], "out_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_2038" ], "out_args": [ "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.2/Add_3_output_0.out10_7" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_output_0.out10_10" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_output_0.out10_10" ], "const_args": [ "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4" ], "out_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11" ], "const_args": [ "onnx::MatMul_2040" ], "out_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" ], "const_args": [ "onnx::MatMul_2041" ], "out_args": [ "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", "/transformer_blocks.3/Add_output_0.out10_10" ], "const_args": [ "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_3_output_0.out10_11" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.3/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out10_11" ], "const_args": [ "onnx::MatMul_2091" ], "out_args": [ "/controlnet_blocks.3/Add_output_0.out17_3_40" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" ], "const_args": [ "onnx::MatMul_2039" ], "out_args": [ "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.2/Add_7_output_0.out10_9" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma" ], "out_args": [ "/transformer_blocks.3/Add_4_output_0.out10_12" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_4_output_0.out10_12" ], "const_args": [ "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4" ], "out_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13" ], "const_args": [ "onnx::MatMul_2042" ], "out_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" ], "const_args": [ "onnx::MatMul_2043" ], "out_args": [ "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.3/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", "/transformer_blocks.3/Add_4_output_0.out10_12" ], "const_args": [ "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma" ], "out_args": [ "/transformer_blocks.3/Add_7_output_0.out10_13" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_3_output_0.out10_11" ], "const_args": [ "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1" ], "out_args": [ "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.3/Add_7_output_0.out10_13" ], "const_args": [ "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1" ], "out_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" ], "const_args": [ "onnx::MatMul_2047_onnx::MatMul_2044" ], "out_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" ], "const_args": [ "onnx::MatMul_2048_onnx::MatMul_2045" ], "out_args": [ "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" ], "const_args": [ "onnx::MatMul_2049_onnx::MatMul_2046" ], "out_args": [ "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/MatMulmha_18_0_4", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" ], "const_args": [], "out_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_2063" ], "out_args": [ "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.3/Add_3_output_0.out10_11" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_output_0.out10_14" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_output_0.out10_14" ], "const_args": [ "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4" ], "out_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15" ], "const_args": [ "onnx::MatMul_2065" ], "out_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" ], "const_args": [ "onnx::MatMul_2066" ], "out_args": [ "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", "/transformer_blocks.4/Add_output_0.out10_14" ], "const_args": [ "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_3_output_0.out10_15" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.4/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out10_15" ], "const_args": [ "onnx::MatMul_2092" ], "out_args": [ "/controlnet_blocks.4/Add_output_0.out17_3_49" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/attn/to_add_out/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" ], "const_args": [ "onnx::MatMul_2064" ], "out_args": [ "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "end": { "type": "int", "value": [ "2147483647" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_4", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.3/Add_7_output_0.out10_13" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma" ], "out_args": [ "/transformer_blocks.4/Add_4_output_0.out10_16" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/norm2_context/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_4_output_0.out10_16" ], "const_args": [ "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_6", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4" ], "out_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17" ], "const_args": [ "onnx::MatMul_2067" ], "out_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/ff_context/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" ], "const_args": [ "onnx::MatMul_2068" ], "out_args": [ "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.4/Add_7", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", "/transformer_blocks.4/Add_4_output_0.out10_16" ], "const_args": [ "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma" ], "out_args": [ "/transformer_blocks.4/Add_7_output_0.out10_17" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_3_output_0.out10_15" ], "const_args": [ "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1" ], "out_args": [ "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.4/Add_7_output_0.out10_17" ], "const_args": [ "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm1_context/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" ], "const_args": [ "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1" ], "out_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" ], "const_args": [ "onnx::MatMul_2072_onnx::MatMul_2069" ], "out_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_1", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" ], "const_args": [ "onnx::MatMul_2073_onnx::MatMul_2070" ], "out_args": [ "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16" ], "attrs": { "trans_head": { "type": "int", "value": [ "1" ] }, "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/Concat_2", "type": "SDGemmConcat_bfp", "in_args": [ "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" ], "const_args": [ "onnx::MatMul_2074_onnx::MatMul_2071" ], "out_args": [ "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "attrs": { "head_num": { "type": "int", "value": [ "24" ] }, "concat_axis": { "type": "int", "value": [ "1" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "input_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape_0": { "type": "str", "value": [ "batch_size", "max_length", "1536" ] }, "output_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "trans_head": { "type": "int", "value": [ "3" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/MatMulmha_18_0_5", "type": "SDMHA_bfp", "in_args": [ "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" ], "const_args": [], "out_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "attrs": { "num_heads": { "type": "int", "value": [ "24" ] }, "unidirectional": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "64", "max_length + floor(h/2)*floor(w/2)" ] }, "op_version": { "type": "str", "value": [ "v2" ] }, "is_flash_mha": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/attn/to_out.0/MatMul", "type": "SDSliceGemm_bfp", "in_args": [ "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" ], "const_args": [ "onnx::MatMul_2085" ], "out_args": [ "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10" ], "attrs": { "axes": { "type": "int", "value": [ "1" ] }, "slice_shape": { "type": "str", "value": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "1536" ] }, "start": { "type": "int", "value": [ "0" ] }, "end": { "type": "str", "value": [ "floor(h/2)*floor(w/2)" ] }, "step": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.4/Add_3_output_0.out10_15" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma" ], "out_args": [ "/transformer_blocks.5/Add_output_0.out10_18" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/norm2/LayerNormalization", "type": "SDLayerNorm_bfbfp", "in_args": [ "/transformer_blocks.5/Add_output_0.out10_18" ], "const_args": [ "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0" ], "out_args": [ "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "attrs": { "axis": { "type": "int", "value": [ "-1" ] }, "epsilon": { "type": "float", "value": [ "9.999999974752427e-07" ] }, "stash_type": { "type": "int", "value": [ "1" ] }, "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "gamma_shape": { "type": "int", "value": [ "1536" ] }, "beta_shape": { "type": "int", "value": [ "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_2", "type": "SDGemmGemmMulAdd_bfp", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4" ], "out_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "weight_shape_1": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.0/proj/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19" ], "const_args": [ "onnx::MatMul_2086" ], "out_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "weight_shape": { "type": "int", "value": [ "1536", "6144" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "nonlinear": { "type": "str", "value": [ "Gelu" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/ff/net.2/MatMul", "type": "SDGemm_bfp", "in_args": [ "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" ], "const_args": [ "onnx::MatMul_2087" ], "out_args": [ "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "6144" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "6144", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "out_dtypes": { "type": "str", "value": [ "bfp16ebs8" ] }, "in_dtypes": { "type": "str", "value": [ "bfp16ebs8", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/transformer_blocks.5/Add_3", "type": "SDGemmMulAdd_bfpbfbf", "in_args": [ "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31", "/transformer_blocks.5/Add_output_0.out10_18" ], "const_args": [ "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma" ], "out_args": [ "/transformer_blocks.5/Add_3_output_0.out10_19" ], "attrs": { "input_shape_0": { "type": "str", "value": [ "batch_size", "1", "1536" ] }, "input_shape_1": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "input_shape_2": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape_0": { "type": "int", "value": [ "1536", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16", "bfp16ebs8" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } }, { "name": "/controlnet_blocks.5/MatMul", "type": "SDGemm", "in_args": [ "/transformer_blocks.5/Add_3_output_0.out10_19" ], "const_args": [ "onnx::MatMul_2093" ], "out_args": [ "/controlnet_blocks.5/Add_output_0.out17_3_57" ], "attrs": { "input_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "output_shape": { "type": "str", "value": [ "batch_size", "floor(h/2)*floor(w/2)", "1536" ] }, "weight_shape": { "type": "int", "value": [ "1536", "1536" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] }, "bias_enable": { "type": "int", "value": [ "1" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfp16ebs8", "bfloat16" ] }, "ctrl_packet": { "type": "int", "value": [ "1" ] } } } ], "fused_tensors": { "in": { "buffer_size": 15936, "xrt_arg_id": 0, "packed_tensors": [ "hidden_states_nhwc.out5_0_0", "/pos_embed/Reshape_1_output_0.out_35_1_2", "controlnet_cond_nhwc.out5_0_1", "/time_text_embed/Cast_output_0.out17_3_3", "pooled_projections.out17_3_1", "encoder_hidden_states.out17_3_0" ] }, "out": { "buffer_size": 18432, "xrt_arg_id": 1, "packed_tensors": [ "/controlnet_blocks.0/Add_output_0.out17_3_13", "/controlnet_blocks.1/Add_output_0.out17_3_22", "/controlnet_blocks.2/Add_output_0.out17_3_31", "/controlnet_blocks.3/Add_output_0.out17_3_40", "/controlnet_blocks.4/Add_output_0.out17_3_49", "/controlnet_blocks.5/Add_output_0.out17_3_57" ] }, "scratch": { "buffer_size": 322048, "xrt_arg_id": 2, "packed_tensors": [ "/pos_embed/Transpose_output_0.out5_0_0", "/pos_embed/Add_2_output_0.out_35_1_2", "/pos_embed_input/Transpose_output_0.out5_0_1", "/Add_output_0.out_35_1_3", "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3", "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1", "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1", "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0", "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2", "/time_text_embed/Add_output_0.out_35_1_4", "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", "encoder_hidden_states.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0", "/context_embedder/Add_output_0.out17_3_0", "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0", "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1", "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22", "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2", "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0", "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", "/transformer_blocks.0/Add_4_output_0.out10_0", "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3", "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1", "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6", "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", "/transformer_blocks.0/Add_7_output_0.out10_1", "/transformer_blocks.0/Add_output_0.out10_20", "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2", "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21", "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5", "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", "/transformer_blocks.0/Add_3_output_0.out10_21", "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4", "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2", "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5", "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5", "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1", "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", "/transformer_blocks.1/Add_output_0.out10_2", "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6", "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3", "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10", "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", "/transformer_blocks.1/Add_3_output_0.out10_3", "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", "/transformer_blocks.1/Add_4_output_0.out10_4", "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7", "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5", "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11", "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", "/transformer_blocks.1/Add_7_output_0.out10_5", "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8", "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6", "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9", "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8", "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2", "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", "/transformer_blocks.2/Add_output_0.out10_6", "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10", "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7", "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15", "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", "/transformer_blocks.2/Add_3_output_0.out10_7", "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", "/transformer_blocks.2/Add_4_output_0.out10_8", "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11", "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9", "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16", "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", "/transformer_blocks.2/Add_7_output_0.out10_9", "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12", "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10", "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13", "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11", "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3", "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", "/transformer_blocks.3/Add_output_0.out10_10", "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14", "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11", "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20", "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", "/transformer_blocks.3/Add_3_output_0.out10_11", "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", "/transformer_blocks.3/Add_4_output_0.out10_12", "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15", "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13", "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21", "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", "/transformer_blocks.3/Add_7_output_0.out10_13", "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16", "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14", "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17", "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14", "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4", "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", "/transformer_blocks.4/Add_output_0.out10_14", "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18", "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15", "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25", "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", "/transformer_blocks.4/Add_3_output_0.out10_15", "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", "/transformer_blocks.4/Add_4_output_0.out10_16", "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19", "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17", "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26", "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", "/transformer_blocks.4/Add_7_output_0.out10_17", "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20", "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18", "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21", "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17", "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5", "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", "/transformer_blocks.5/Add_output_0.out10_18", "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22", "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19", "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30", "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31", "/transformer_blocks.5/Add_3_output_0.out10_19" ] }, "const": { "buffer_size": 579539968, "xrt_arg_id": 3, "packed_tensors": [ "pos_embed.proj.weight", "pos_embed_input.proj.weight", "time_text_embed.timestep_embedder.linear_1.weight_5_1_2", "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1", "time_text_embed.timestep_embedder.linear_2.weight_5_1_3", "time_text_embed.text_embedder.linear_1.weight_5_1_0", "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0", "time_text_embed.text_embedder.linear_2.weight_5_1_1", "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2", "encoder_hidden_states.out17_3_0_bfp.wts", "onnx::MatMul_1943", "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts", "/transformer_blocks.0/norm1_context/norm/Constant_output_0", "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0", "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0", "/transformer_blocks.0/norm1/norm/Constant_output_0", "/transformer_blocks.0/norm1/norm/Constant_1_output_0", "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3", "onnx::MatMul_1947_onnx::MatMul_1944", "onnx::MatMul_1948_onnx::MatMul_1945", "onnx::MatMul_1949_onnx::MatMul_1946", "onnx::MatMul_1964", "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma", "/transformer_blocks.0/norm2_context/Constant_output_0", "/transformer_blocks.0/norm2_context/Constant_1_output_0", "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2", "onnx::MatMul_1963", "onnx::MatMul_1967", "onnx::MatMul_1968", "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma", "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma", "/transformer_blocks.0/norm2/Constant_output_0", "/transformer_blocks.0/norm2/Constant_1_output_0", "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0", "onnx::MatMul_1965", "onnx::MatMul_1966", "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma", "/transformer_blocks.1/norm1/norm/Constant_output_0", "/transformer_blocks.1/norm1/norm/Constant_1_output_0", "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1", "onnx::MatMul_2088", "/transformer_blocks.1/norm1_context/norm/Constant_output_0", "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0", "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1", "onnx::MatMul_1972_onnx::MatMul_1969", "onnx::MatMul_1973_onnx::MatMul_1970", "onnx::MatMul_1974_onnx::MatMul_1971", "onnx::MatMul_1988", "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma", "/transformer_blocks.1/norm2/Constant_output_0", "/transformer_blocks.1/norm2/Constant_1_output_0", "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4", "onnx::MatMul_1990", "onnx::MatMul_1991", "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma", "onnx::MatMul_2089", "onnx::MatMul_1989", "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma", "/transformer_blocks.1/norm2_context/Constant_output_0", "/transformer_blocks.1/norm2_context/Constant_1_output_0", "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4", "onnx::MatMul_1992", "onnx::MatMul_1993", "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma", "/transformer_blocks.2/norm1/norm/Constant_output_0", "/transformer_blocks.2/norm1/norm/Constant_1_output_0", "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1", "/transformer_blocks.2/norm1_context/norm/Constant_output_0", "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0", "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1", "onnx::MatMul_1997_onnx::MatMul_1994", "onnx::MatMul_1998_onnx::MatMul_1995", "onnx::MatMul_1999_onnx::MatMul_1996", "onnx::MatMul_2013", "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma", "/transformer_blocks.2/norm2/Constant_output_0", "/transformer_blocks.2/norm2/Constant_1_output_0", "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4", "onnx::MatMul_2015", "onnx::MatMul_2016", "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma", "onnx::MatMul_2090", "onnx::MatMul_2014", "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma", "/transformer_blocks.2/norm2_context/Constant_output_0", "/transformer_blocks.2/norm2_context/Constant_1_output_0", "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4", "onnx::MatMul_2017", "onnx::MatMul_2018", "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma", "/transformer_blocks.3/norm1/norm/Constant_output_0", "/transformer_blocks.3/norm1/norm/Constant_1_output_0", "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1", "/transformer_blocks.3/norm1_context/norm/Constant_output_0", "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0", "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1", "onnx::MatMul_2022_onnx::MatMul_2019", "onnx::MatMul_2023_onnx::MatMul_2020", "onnx::MatMul_2024_onnx::MatMul_2021", "onnx::MatMul_2038", "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma", "/transformer_blocks.3/norm2/Constant_output_0", "/transformer_blocks.3/norm2/Constant_1_output_0", "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4", "onnx::MatMul_2040", "onnx::MatMul_2041", "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma", "onnx::MatMul_2091", "onnx::MatMul_2039", "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma", "/transformer_blocks.3/norm2_context/Constant_output_0", "/transformer_blocks.3/norm2_context/Constant_1_output_0", "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4", "onnx::MatMul_2042", "onnx::MatMul_2043", "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma", "/transformer_blocks.4/norm1/norm/Constant_output_0", "/transformer_blocks.4/norm1/norm/Constant_1_output_0", "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1", "/transformer_blocks.4/norm1_context/norm/Constant_output_0", "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0", "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1", "onnx::MatMul_2047_onnx::MatMul_2044", "onnx::MatMul_2048_onnx::MatMul_2045", "onnx::MatMul_2049_onnx::MatMul_2046", "onnx::MatMul_2063", "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma", "/transformer_blocks.4/norm2/Constant_output_0", "/transformer_blocks.4/norm2/Constant_1_output_0", "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4", "onnx::MatMul_2065", "onnx::MatMul_2066", "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma", "onnx::MatMul_2092", "onnx::MatMul_2064", "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma", "/transformer_blocks.4/norm2_context/Constant_output_0", "/transformer_blocks.4/norm2_context/Constant_1_output_0", "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4", "onnx::MatMul_2067", "onnx::MatMul_2068", "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma", "/transformer_blocks.5/norm1/norm/Constant_output_0", "/transformer_blocks.5/norm1/norm/Constant_1_output_0", "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1", "/transformer_blocks.5/norm1_context/norm/Constant_output_0", "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0", "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1", "onnx::MatMul_2072_onnx::MatMul_2069", "onnx::MatMul_2073_onnx::MatMul_2070", "onnx::MatMul_2074_onnx::MatMul_2071", "onnx::MatMul_2085", "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma", "/transformer_blocks.5/norm2/Constant_output_0", "/transformer_blocks.5/norm2/Constant_1_output_0", "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4", "onnx::MatMul_2086", "onnx::MatMul_2087", "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma", "onnx::MatMul_2093" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] } }, "tensor_map": { "hidden_states_nhwc.out5_0_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_size", "w", "h", "False" ], "offset": 0 }, "/pos_embed/Reshape_1_output_0.out_35_1_2": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "False", "floor(h/2)*floor(w/2)", "False" ], "offset": 32 }, "controlnet_cond_nhwc.out5_0_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1, 16 ], "size_in_bytes": 32, "op_tensor_size": 32, "dynamic_shapes": [ "batch_size", "w", "h", "False" ], "offset": 3104 }, "/time_text_embed/Cast_output_0.out17_3_3": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 256 ], "size_in_bytes": 512, "op_tensor_size": 512, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3136 }, "pooled_projections.out17_3_1": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 2048 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "False" ], "offset": 3648 }, "encoder_hidden_states.out17_3_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 4096 ], "size_in_bytes": 8192, "op_tensor_size": 8192, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 7744 }, "/controlnet_blocks.0/Add_output_0.out17_3_13": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 0 }, "/controlnet_blocks.1/Add_output_0.out17_3_22": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 3072 }, "/controlnet_blocks.2/Add_output_0.out17_3_31": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 6144 }, "/controlnet_blocks.3/Add_output_0.out17_3_40": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 9216 }, "/controlnet_blocks.4/Add_output_0.out17_3_49": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 12288 }, "/controlnet_blocks.5/Add_output_0.out17_3_57": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 15360 }, "/pos_embed/Transpose_output_0.out5_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 0 }, "/pos_embed/Add_2_output_0.out_35_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 3072 }, "/pos_embed_input/Transpose_output_0.out5_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 6144 }, "/Add_output_0.out_35_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 9216 }, "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 12288 }, "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 15360 }, "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 18432 }, "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 21504 }, "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 24576 }, "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 27648 }, "/time_text_embed/Add_output_0.out_35_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 30720 }, "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "False" ], "offset": 33792 }, "encoder_hidden_states.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 4096 ], "size_in_bytes": 4096, "op_tensor_size": 4096, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 36864 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 40960 }, "/context_embedder/Add_output_0.out17_3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 42496 }, "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 45568 }, "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 47104 }, "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 48640 }, "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 50176 }, "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 51712 }, "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 53248 }, "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 54784 }, "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 56320 }, "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 57856 }, "/transformer_blocks.0/Add_4_output_0.out10_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 59392 }, "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 62464 }, "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 64000 }, "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 65536 }, "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 67072 }, "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 73216 }, "/transformer_blocks.0/Add_7_output_0.out10_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 74752 }, "/transformer_blocks.0/Add_output_0.out10_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 77824 }, "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 80896 }, "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 82432 }, "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 83968 }, "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 90112 }, "/transformer_blocks.0/Add_3_output_0.out10_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 91648 }, "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 94720 }, "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 96256 }, "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 97792 }, "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 99328 }, "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 100864 }, "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 102400 }, "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 103936 }, "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 105472 }, "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 107008 }, "/transformer_blocks.1/Add_output_0.out10_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 108544 }, "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 111616 }, "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 113152 }, "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 114688 }, "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 120832 }, "/transformer_blocks.1/Add_3_output_0.out10_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 122368 }, "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 125440 }, "/transformer_blocks.1/Add_4_output_0.out10_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 126976 }, "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 130048 }, "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 131584 }, "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 133120 }, "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 139264 }, "/transformer_blocks.1/Add_7_output_0.out10_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 140800 }, "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 143872 }, "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 145408 }, "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 146944 }, "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 148480 }, "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 150016 }, "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 151552 }, "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 153088 }, "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 154624 }, "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 156160 }, "/transformer_blocks.2/Add_output_0.out10_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 157696 }, "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 160768 }, "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 162304 }, "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 163840 }, "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 169984 }, "/transformer_blocks.2/Add_3_output_0.out10_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 171520 }, "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 174592 }, "/transformer_blocks.2/Add_4_output_0.out10_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 176128 }, "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 179200 }, "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 180736 }, "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 182272 }, "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 188416 }, "/transformer_blocks.2/Add_7_output_0.out10_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 189952 }, "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 193024 }, "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 194560 }, "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 196096 }, "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 197632 }, "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 199168 }, "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 200704 }, "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 202240 }, "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 203776 }, "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 205312 }, "/transformer_blocks.3/Add_output_0.out10_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 206848 }, "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 209920 }, "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 211456 }, "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 212992 }, "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 219136 }, "/transformer_blocks.3/Add_3_output_0.out10_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 220672 }, "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 223744 }, "/transformer_blocks.3/Add_4_output_0.out10_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 225280 }, "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 228352 }, "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 229888 }, "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 231424 }, "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 237568 }, "/transformer_blocks.3/Add_7_output_0.out10_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 239104 }, "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 242176 }, "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 243712 }, "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 245248 }, "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 246784 }, "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 248320 }, "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 249856 }, "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 251392 }, "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 252928 }, "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 254464 }, "/transformer_blocks.4/Add_output_0.out10_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 256000 }, "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 259072 }, "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 260608 }, "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 262144 }, "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 268288 }, "/transformer_blocks.4/Add_3_output_0.out10_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 269824 }, "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 272896 }, "/transformer_blocks.4/Add_4_output_0.out10_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 274432 }, "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 277504 }, "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 279040 }, "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 280576 }, "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 286720 }, "/transformer_blocks.4/Add_7_output_0.out10_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 288256 }, "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 291328 }, "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 292864 }, "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 294400 }, "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length", "False" ], "offset": 295936 }, "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 297472 }, "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 299008 }, "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 24, 1, 64 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "False", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 300544 }, "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "max_length + floor(h/2)*floor(w/2)", "False" ], "offset": 302080 }, "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 303616 }, "/transformer_blocks.5/Add_output_0.out10_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 305152 }, "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 308224 }, "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 309760 }, "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 6144 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 311296 }, "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "uint8", "shape": [ 1, 1, 1536 ], "size_in_bytes": 1536, "op_tensor_size": 1536, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 317440 }, "/transformer_blocks.5/Add_3_output_0.out10_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "dynamic_shapes": [ "batch_size", "floor(h/2)*floor(w/2)", "False" ], "offset": 318976 }, "pos_embed.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 0, "file_name": "cache/pos_embedprojConv_0.const", "file_size": 135168 }, "pos_embed_input.proj.weight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 135168 ], "size_in_bytes": 135168, "op_tensor_size": 135168, "offset": 135168, "file_name": "cache/pos_embedprojConv_1.const", "file_size": 135168 }, "time_text_embed.timestep_embedder.linear_1.weight_5_1_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 448512 ], "size_in_bytes": 448512, "op_tensor_size": 448512, "offset": 270336, "file_name": "cache/pos_embedprojConv_2.const", "file_size": 448512 }, "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 718848, "file_name": "cache/pos_embedprojConv_3.const", "file_size": 256 }, "time_text_embed.timestep_embedder.linear_2.weight_5_1_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 719104, "file_name": "cache/pos_embedprojConv_4.const", "file_size": 2691072 }, "time_text_embed.text_embedder.linear_1.weight_5_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 3588096 ], "size_in_bytes": 3588096, "op_tensor_size": 3588096, "offset": 3410176, "file_name": "cache/pos_embedprojConv_5.const", "file_size": 3588096 }, "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 6998272, "file_name": "cache/pos_embedprojConv_6.const", "file_size": 256 }, "time_text_embed.text_embedder.linear_2.weight_5_1_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 6998528, "file_name": "cache/pos_embedprojConv_7.const", "file_size": 2691072 }, "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 128 ], "size_in_bytes": 256, "op_tensor_size": 256, "offset": 9689600, "file_name": "cache/pos_embedprojConv_8.const", "file_size": 256 }, "encoder_hidden_states.out17_3_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 9689856, "file_name": "cache/pos_embedprojConv_9.const", "file_size": 128 }, "onnx::MatMul_1943": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 7274496 ], "size_in_bytes": 7274496, "op_tensor_size": 7274496, "offset": 9689984, "file_name": "cache/pos_embedprojConv_10.const", "file_size": 7274496 }, "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 64 ], "size_in_bytes": 128, "op_tensor_size": 128, "offset": 16964480, "file_name": "cache/pos_embedprojConv_11.const", "file_size": 128 }, "/transformer_blocks.0/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16964608, "file_name": "cache/pos_embedprojConv_12.const", "file_size": 3072 }, "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 16967680, "file_name": "cache/pos_embedprojConv_13.const", "file_size": 3072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 16970752, "file_name": "cache/pos_embedprojConv_14.const", "file_size": 5382144 }, "/transformer_blocks.0/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22352896, "file_name": "cache/pos_embedprojConv_15.const", "file_size": 3072 }, "/transformer_blocks.0/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 22355968, "file_name": "cache/pos_embedprojConv_16.const", "file_size": 3072 }, "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 22359040, "file_name": "cache/pos_embedprojConv_17.const", "file_size": 5382144 }, "onnx::MatMul_1947_onnx::MatMul_1944": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 27741184, "file_name": "cache/pos_embedprojConv_18.const", "file_size": 5382144 }, "onnx::MatMul_1948_onnx::MatMul_1945": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 33123328, "file_name": "cache/pos_embedprojConv_19.const", "file_size": 5382144 }, "onnx::MatMul_1949_onnx::MatMul_1946": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 38505472, "file_name": "cache/pos_embedprojConv_20.const", "file_size": 5382144 }, "onnx::MatMul_1964": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 43887616, "file_name": "cache/pos_embedprojConv_21.const", "file_size": 2691072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 46578688, "file_name": "cache/pos_embedprojConv_22.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49269760, "file_name": "cache/pos_embedprojConv_23.const", "file_size": 3072 }, "/transformer_blocks.0/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49272832, "file_name": "cache/pos_embedprojConv_24.const", "file_size": 3072 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 49275904, "file_name": "cache/pos_embedprojConv_25.const", "file_size": 5382144 }, "onnx::MatMul_1963": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 54658048, "file_name": "cache/pos_embedprojConv_26.const", "file_size": 2691072 }, "onnx::MatMul_1967": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 57349120, "file_name": "cache/pos_embedprojConv_27.const", "file_size": 10764288 }, "onnx::MatMul_1968": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 68113408, "file_name": "cache/pos_embedprojConv_28.const", "file_size": 10764288 }, "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 78877696, "file_name": "cache/pos_embedprojConv_29.const", "file_size": 2691072 }, "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 81568768, "file_name": "cache/pos_embedprojConv_30.const", "file_size": 2691072 }, "/transformer_blocks.0/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84259840, "file_name": "cache/pos_embedprojConv_31.const", "file_size": 3072 }, "/transformer_blocks.0/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 84262912, "file_name": "cache/pos_embedprojConv_32.const", "file_size": 3072 }, "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 84265984, "file_name": "cache/pos_embedprojConv_33.const", "file_size": 5382144 }, "onnx::MatMul_1965": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 89648128, "file_name": "cache/pos_embedprojConv_34.const", "file_size": 10764288 }, "onnx::MatMul_1966": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 100412416, "file_name": "cache/pos_embedprojConv_35.const", "file_size": 10764288 }, "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 111176704, "file_name": "cache/pos_embedprojConv_36.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113867776, "file_name": "cache/pos_embedprojConv_37.const", "file_size": 3072 }, "/transformer_blocks.1/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 113870848, "file_name": "cache/pos_embedprojConv_38.const", "file_size": 3072 }, "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 113873920, "file_name": "cache/pos_embedprojConv_39.const", "file_size": 5382144 }, "onnx::MatMul_2088": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 119256064, "file_name": "cache/pos_embedprojConv_40.const", "file_size": 2691072 }, "/transformer_blocks.1/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 121947136, "file_name": "cache/pos_embedprojConv_41.const", "file_size": 3072 }, "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 121950208, "file_name": "cache/pos_embedprojConv_42.const", "file_size": 3072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 121953280, "file_name": "cache/pos_embedprojConv_43.const", "file_size": 5382144 }, "onnx::MatMul_1972_onnx::MatMul_1969": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 127335424, "file_name": "cache/pos_embedprojConv_44.const", "file_size": 5382144 }, "onnx::MatMul_1973_onnx::MatMul_1970": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 132717568, "file_name": "cache/pos_embedprojConv_45.const", "file_size": 5382144 }, "onnx::MatMul_1974_onnx::MatMul_1971": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 138099712, "file_name": "cache/pos_embedprojConv_46.const", "file_size": 5382144 }, "onnx::MatMul_1988": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 143481856, "file_name": "cache/pos_embedprojConv_47.const", "file_size": 2691072 }, "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 146172928, "file_name": "cache/pos_embedprojConv_48.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 148864000, "file_name": "cache/pos_embedprojConv_49.const", "file_size": 3072 }, "/transformer_blocks.1/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 148867072, "file_name": "cache/pos_embedprojConv_50.const", "file_size": 3072 }, "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 148870144, "file_name": "cache/pos_embedprojConv_51.const", "file_size": 5382144 }, "onnx::MatMul_1990": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 154252288, "file_name": "cache/pos_embedprojConv_52.const", "file_size": 10764288 }, "onnx::MatMul_1991": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 165016576, "file_name": "cache/pos_embedprojConv_53.const", "file_size": 10764288 }, "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 175780864, "file_name": "cache/pos_embedprojConv_54.const", "file_size": 2691072 }, "onnx::MatMul_2089": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 178471936, "file_name": "cache/pos_embedprojConv_55.const", "file_size": 2691072 }, "onnx::MatMul_1989": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 181163008, "file_name": "cache/pos_embedprojConv_56.const", "file_size": 2691072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 183854080, "file_name": "cache/pos_embedprojConv_57.const", "file_size": 2691072 }, "/transformer_blocks.1/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 186545152, "file_name": "cache/pos_embedprojConv_58.const", "file_size": 3072 }, "/transformer_blocks.1/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 186548224, "file_name": "cache/pos_embedprojConv_59.const", "file_size": 3072 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 186551296, "file_name": "cache/pos_embedprojConv_60.const", "file_size": 5382144 }, "onnx::MatMul_1992": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 191933440, "file_name": "cache/pos_embedprojConv_61.const", "file_size": 10764288 }, "onnx::MatMul_1993": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 202697728, "file_name": "cache/pos_embedprojConv_62.const", "file_size": 10764288 }, "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 213462016, "file_name": "cache/pos_embedprojConv_63.const", "file_size": 2691072 }, "/transformer_blocks.2/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216153088, "file_name": "cache/pos_embedprojConv_64.const", "file_size": 3072 }, "/transformer_blocks.2/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 216156160, "file_name": "cache/pos_embedprojConv_65.const", "file_size": 3072 }, "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 216159232, "file_name": "cache/pos_embedprojConv_66.const", "file_size": 5382144 }, "/transformer_blocks.2/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 221541376, "file_name": "cache/pos_embedprojConv_67.const", "file_size": 3072 }, "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 221544448, "file_name": "cache/pos_embedprojConv_68.const", "file_size": 3072 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 221547520, "file_name": "cache/pos_embedprojConv_69.const", "file_size": 5382144 }, "onnx::MatMul_1997_onnx::MatMul_1994": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 226929664, "file_name": "cache/pos_embedprojConv_70.const", "file_size": 5382144 }, "onnx::MatMul_1998_onnx::MatMul_1995": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 232311808, "file_name": "cache/pos_embedprojConv_71.const", "file_size": 5382144 }, "onnx::MatMul_1999_onnx::MatMul_1996": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 237693952, "file_name": "cache/pos_embedprojConv_72.const", "file_size": 5382144 }, "onnx::MatMul_2013": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 243076096, "file_name": "cache/pos_embedprojConv_73.const", "file_size": 2691072 }, "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 245767168, "file_name": "cache/pos_embedprojConv_74.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248458240, "file_name": "cache/pos_embedprojConv_75.const", "file_size": 3072 }, "/transformer_blocks.2/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248461312, "file_name": "cache/pos_embedprojConv_76.const", "file_size": 3072 }, "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 248464384, "file_name": "cache/pos_embedprojConv_77.const", "file_size": 5382144 }, "onnx::MatMul_2015": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 253846528, "file_name": "cache/pos_embedprojConv_78.const", "file_size": 10764288 }, "onnx::MatMul_2016": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 264610816, "file_name": "cache/pos_embedprojConv_79.const", "file_size": 10764288 }, "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 275375104, "file_name": "cache/pos_embedprojConv_80.const", "file_size": 2691072 }, "onnx::MatMul_2090": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 278066176, "file_name": "cache/pos_embedprojConv_81.const", "file_size": 2691072 }, "onnx::MatMul_2014": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 280757248, "file_name": "cache/pos_embedprojConv_82.const", "file_size": 2691072 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 283448320, "file_name": "cache/pos_embedprojConv_83.const", "file_size": 2691072 }, "/transformer_blocks.2/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 286139392, "file_name": "cache/pos_embedprojConv_84.const", "file_size": 3072 }, "/transformer_blocks.2/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 286142464, "file_name": "cache/pos_embedprojConv_85.const", "file_size": 3072 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 286145536, "file_name": "cache/pos_embedprojConv_86.const", "file_size": 5382144 }, "onnx::MatMul_2017": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 291527680, "file_name": "cache/pos_embedprojConv_87.const", "file_size": 10764288 }, "onnx::MatMul_2018": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 302291968, "file_name": "cache/pos_embedprojConv_88.const", "file_size": 10764288 }, "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 313056256, "file_name": "cache/pos_embedprojConv_89.const", "file_size": 2691072 }, "/transformer_blocks.3/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315747328, "file_name": "cache/pos_embedprojConv_90.const", "file_size": 3072 }, "/transformer_blocks.3/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 315750400, "file_name": "cache/pos_embedprojConv_91.const", "file_size": 3072 }, "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 315753472, "file_name": "cache/pos_embedprojConv_92.const", "file_size": 5382144 }, "/transformer_blocks.3/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 321135616, "file_name": "cache/pos_embedprojConv_93.const", "file_size": 3072 }, "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 321138688, "file_name": "cache/pos_embedprojConv_94.const", "file_size": 3072 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 321141760, "file_name": "cache/pos_embedprojConv_95.const", "file_size": 5382144 }, "onnx::MatMul_2022_onnx::MatMul_2019": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 326523904, "file_name": "cache/pos_embedprojConv_96.const", "file_size": 5382144 }, "onnx::MatMul_2023_onnx::MatMul_2020": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 331906048, "file_name": "cache/pos_embedprojConv_97.const", "file_size": 5382144 }, "onnx::MatMul_2024_onnx::MatMul_2021": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 337288192, "file_name": "cache/pos_embedprojConv_98.const", "file_size": 5382144 }, "onnx::MatMul_2038": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 342670336, "file_name": "cache/pos_embedprojConv_99.const", "file_size": 2691072 }, "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 345361408, "file_name": "cache/pos_embedprojConv_100.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 348052480, "file_name": "cache/pos_embedprojConv_101.const", "file_size": 3072 }, "/transformer_blocks.3/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 348055552, "file_name": "cache/pos_embedprojConv_102.const", "file_size": 3072 }, "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 348058624, "file_name": "cache/pos_embedprojConv_103.const", "file_size": 5382144 }, "onnx::MatMul_2040": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 353440768, "file_name": "cache/pos_embedprojConv_104.const", "file_size": 10764288 }, "onnx::MatMul_2041": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 364205056, "file_name": "cache/pos_embedprojConv_105.const", "file_size": 10764288 }, "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 374969344, "file_name": "cache/pos_embedprojConv_106.const", "file_size": 2691072 }, "onnx::MatMul_2091": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 377660416, "file_name": "cache/pos_embedprojConv_107.const", "file_size": 2691072 }, "onnx::MatMul_2039": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 380351488, "file_name": "cache/pos_embedprojConv_108.const", "file_size": 2691072 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 383042560, "file_name": "cache/pos_embedprojConv_109.const", "file_size": 2691072 }, "/transformer_blocks.3/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 385733632, "file_name": "cache/pos_embedprojConv_110.const", "file_size": 3072 }, "/transformer_blocks.3/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 385736704, "file_name": "cache/pos_embedprojConv_111.const", "file_size": 3072 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 385739776, "file_name": "cache/pos_embedprojConv_112.const", "file_size": 5382144 }, "onnx::MatMul_2042": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 391121920, "file_name": "cache/pos_embedprojConv_113.const", "file_size": 10764288 }, "onnx::MatMul_2043": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 401886208, "file_name": "cache/pos_embedprojConv_114.const", "file_size": 10764288 }, "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 412650496, "file_name": "cache/pos_embedprojConv_115.const", "file_size": 2691072 }, "/transformer_blocks.4/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415341568, "file_name": "cache/pos_embedprojConv_116.const", "file_size": 3072 }, "/transformer_blocks.4/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 415344640, "file_name": "cache/pos_embedprojConv_117.const", "file_size": 3072 }, "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 415347712, "file_name": "cache/pos_embedprojConv_118.const", "file_size": 5382144 }, "/transformer_blocks.4/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 420729856, "file_name": "cache/pos_embedprojConv_119.const", "file_size": 3072 }, "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 420732928, "file_name": "cache/pos_embedprojConv_120.const", "file_size": 3072 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 420736000, "file_name": "cache/pos_embedprojConv_121.const", "file_size": 5382144 }, "onnx::MatMul_2047_onnx::MatMul_2044": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 426118144, "file_name": "cache/pos_embedprojConv_122.const", "file_size": 5382144 }, "onnx::MatMul_2048_onnx::MatMul_2045": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 431500288, "file_name": "cache/pos_embedprojConv_123.const", "file_size": 5382144 }, "onnx::MatMul_2049_onnx::MatMul_2046": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 436882432, "file_name": "cache/pos_embedprojConv_124.const", "file_size": 5382144 }, "onnx::MatMul_2063": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 442264576, "file_name": "cache/pos_embedprojConv_125.const", "file_size": 2691072 }, "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 444955648, "file_name": "cache/pos_embedprojConv_126.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 447646720, "file_name": "cache/pos_embedprojConv_127.const", "file_size": 3072 }, "/transformer_blocks.4/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 447649792, "file_name": "cache/pos_embedprojConv_128.const", "file_size": 3072 }, "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 447652864, "file_name": "cache/pos_embedprojConv_129.const", "file_size": 5382144 }, "onnx::MatMul_2065": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 453035008, "file_name": "cache/pos_embedprojConv_130.const", "file_size": 10764288 }, "onnx::MatMul_2066": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 463799296, "file_name": "cache/pos_embedprojConv_131.const", "file_size": 10764288 }, "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 474563584, "file_name": "cache/pos_embedprojConv_132.const", "file_size": 2691072 }, "onnx::MatMul_2092": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 477254656, "file_name": "cache/pos_embedprojConv_133.const", "file_size": 2691072 }, "onnx::MatMul_2064": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 479945728, "file_name": "cache/pos_embedprojConv_134.const", "file_size": 2691072 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 482636800, "file_name": "cache/pos_embedprojConv_135.const", "file_size": 2691072 }, "/transformer_blocks.4/norm2_context/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485327872, "file_name": "cache/pos_embedprojConv_136.const", "file_size": 3072 }, "/transformer_blocks.4/norm2_context/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485330944, "file_name": "cache/pos_embedprojConv_137.const", "file_size": 3072 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 485334016, "file_name": "cache/pos_embedprojConv_138.const", "file_size": 5382144 }, "onnx::MatMul_2067": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 490716160, "file_name": "cache/pos_embedprojConv_139.const", "file_size": 10764288 }, "onnx::MatMul_2068": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 501480448, "file_name": "cache/pos_embedprojConv_140.const", "file_size": 10764288 }, "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 512244736, "file_name": "cache/pos_embedprojConv_141.const", "file_size": 2691072 }, "/transformer_blocks.5/norm1/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514935808, "file_name": "cache/pos_embedprojConv_142.const", "file_size": 3072 }, "/transformer_blocks.5/norm1/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 514938880, "file_name": "cache/pos_embedprojConv_143.const", "file_size": 3072 }, "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 514941952, "file_name": "cache/pos_embedprojConv_144.const", "file_size": 5382144 }, "/transformer_blocks.5/norm1_context/norm/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 520324096, "file_name": "cache/pos_embedprojConv_145.const", "file_size": 3072 }, "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 520327168, "file_name": "cache/pos_embedprojConv_146.const", "file_size": 3072 }, "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 520330240, "file_name": "cache/pos_embedprojConv_147.const", "file_size": 5382144 }, "onnx::MatMul_2072_onnx::MatMul_2069": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 525712384, "file_name": "cache/pos_embedprojConv_148.const", "file_size": 5382144 }, "onnx::MatMul_2073_onnx::MatMul_2070": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 531094528, "file_name": "cache/pos_embedprojConv_149.const", "file_size": 5382144 }, "onnx::MatMul_2074_onnx::MatMul_2071": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 536476672, "file_name": "cache/pos_embedprojConv_150.const", "file_size": 5382144 }, "onnx::MatMul_2085": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 541858816, "file_name": "cache/pos_embedprojConv_151.const", "file_size": 2691072 }, "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 544549888, "file_name": "cache/pos_embedprojConv_152.const", "file_size": 2691072 }, "/transformer_blocks.5/norm2/Constant_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 547240960, "file_name": "cache/pos_embedprojConv_153.const", "file_size": 3072 }, "/transformer_blocks.5/norm2/Constant_1_output_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 547244032, "file_name": "cache/pos_embedprojConv_154.const", "file_size": 3072 }, "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 5382144 ], "size_in_bytes": 5382144, "op_tensor_size": 5382144, "offset": 547247104, "file_name": "cache/pos_embedprojConv_155.const", "file_size": 5382144 }, "onnx::MatMul_2086": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 552629248, "file_name": "cache/pos_embedprojConv_156.const", "file_size": 10764288 }, "onnx::MatMul_2087": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 10764288 ], "size_in_bytes": 10764288, "op_tensor_size": 10764288, "offset": 563393536, "file_name": "cache/pos_embedprojConv_157.const", "file_size": 10764288 }, "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 574157824, "file_name": "cache/pos_embedprojConv_158.const", "file_size": 2691072 }, "onnx::MatMul_2093": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ 2691072 ], "size_in_bytes": 2691072, "op_tensor_size": 2691072, "offset": 576848896, "file_name": "cache/pos_embedprojConv_159.const", "file_size": 2691072 } }, "dynamic_shape_subgraph": true, "dynamic_shape_list": [ { "floor(h/2)": 32, "max_length + floor(h/2)*floor(w/2)": 1184, "h": 64, "w": 64, "floor(h/2)*floor(w/2)": 1024, "batch_size": 2, "max_length": 160, "floor(w/2)": 32 }, { "floor(h/2)": 32, "max_length + floor(h/2)*floor(w/2)": 1696, "h": 64, "w": 96, "floor(h/2)*floor(w/2)": 1536, "batch_size": 2, "max_length": 160, "floor(w/2)": 48 }, { "floor(h/2)": 48, "max_length + floor(h/2)*floor(w/2)": 1696, "h": 96, "w": 64, "floor(h/2)*floor(w/2)": 1536, "batch_size": 2, "max_length": 160, "floor(w/2)": 32 }, { "floor(h/2)": 36, "max_length + floor(h/2)*floor(w/2)": 2464, "h": 72, "w": 128, "floor(h/2)*floor(w/2)": 2304, "batch_size": 2, "max_length": 160, "floor(w/2)": 64 }, { "floor(h/2)": 64, "max_length + floor(h/2)*floor(w/2)": 2464, "h": 128, "w": 72, "floor(h/2)*floor(w/2)": 2304, "batch_size": 2, "max_length": 160, "floor(w/2)": 36 }, { "floor(h/2)": 48, "max_length + floor(h/2)*floor(w/2)": 3232, "h": 96, "w": 128, "floor(h/2)*floor(w/2)": 3072, "batch_size": 2, "max_length": 160, "floor(w/2)": 64 }, { "floor(h/2)": 64, "max_length + floor(h/2)*floor(w/2)": 3232, "h": 128, "w": 96, "floor(h/2)*floor(w/2)": 3072, "batch_size": 2, "max_length": 160, "floor(w/2)": 48 }, { "floor(h/2)": 64, "max_length + floor(h/2)*floor(w/2)": 4256, "h": 128, "w": 128, "floor(h/2)*floor(w/2)": 4096, "batch_size": 2, "max_length": 160, "floor(w/2)": 64 } ], "aux_info": {} }