kateh's picture
Mirror from amd/stable-diffusion-3-medium-amdnpu
cdc9057 verified
{
"dd_meta_major_version": 1,
"dd_meta_minor_version": 4,
"state_table_updates": [],
"op_list": [
{
"name": "/pos_embed/proj/Conv",
"type": "SDConv",
"in_args": [
"hidden_states_nhwc.out5_0_0"
],
"const_args": [
"pos_embed.proj.weight"
],
"out_args": [
"/pos_embed/Transpose_output_0.out5_0_0"
],
"attrs": {
"auto_pad": {
"type": "str",
"value": [
"NOTSET"
]
},
"dilations": {
"type": "int",
"value": [
"1",
"1"
]
},
"group": {
"type": "int",
"value": [
"1"
]
},
"kernel_shape": {
"type": "int",
"value": [
"2",
"2"
]
},
"pads": {
"type": "int",
"value": [
"0",
"0",
"0",
"0"
]
},
"strides": {
"type": "int",
"value": [
"2",
"2"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"w",
"h",
"16"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(w/2)",
"floor(h/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"2",
"2",
"16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"float"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/pos_embed/Add_2",
"type": "SDAdd",
"in_args": [
"/pos_embed/Transpose_output_0.out5_0_0",
"/pos_embed/Reshape_1_output_0.out_35_1_2"
],
"const_args": [],
"out_args": [
"/pos_embed/Add_2_output_0.out_35_1_2"
],
"attrs": {
"a_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"b_shape": {
"type": "str",
"value": [
"1",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"c_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"is_bias_add": {
"type": "int",
"value": [
"0"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/pos_embed_input/proj/Conv",
"type": "SDConv",
"in_args": [
"controlnet_cond_nhwc.out5_0_1"
],
"const_args": [
"pos_embed_input.proj.weight"
],
"out_args": [
"/pos_embed_input/Transpose_output_0.out5_0_1"
],
"attrs": {
"auto_pad": {
"type": "str",
"value": [
"NOTSET"
]
},
"dilations": {
"type": "int",
"value": [
"1",
"1"
]
},
"group": {
"type": "int",
"value": [
"1"
]
},
"kernel_shape": {
"type": "int",
"value": [
"2",
"2"
]
},
"pads": {
"type": "int",
"value": [
"0",
"0",
"0",
"0"
]
},
"strides": {
"type": "int",
"value": [
"2",
"2"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"w",
"h",
"16"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(w/2)",
"floor(h/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"2",
"2",
"16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"float"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/Add",
"type": "SDAdd",
"in_args": [
"/pos_embed/Add_2_output_0.out_35_1_2",
"/pos_embed_input/Transpose_output_0.out5_0_1"
],
"const_args": [],
"out_args": [
"/Add_output_0.out_35_1_3"
],
"attrs": {
"a_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"b_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"c_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"is_bias_add": {
"type": "int",
"value": [
"0"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/timestep_embedder/linear_1/Gemm",
"type": "SDGemm",
"in_args": [
"/time_text_embed/Cast_output_0.out17_3_3"
],
"const_args": [
"time_text_embed.timestep_embedder.linear_1.weight_5_1_2"
],
"out_args": [
"/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"256"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"256",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/timestep_embedder/act/Sigmoid",
"type": "SDSilu",
"in_args": [
"/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3"
],
"const_args": [
"/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1"
],
"out_args": [
"/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"weight_shape": {
"type": "int",
"value": [
"128"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/timestep_embedder/linear_2/Gemm",
"type": "SDGemm",
"in_args": [
"/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1"
],
"const_args": [
"time_text_embed.timestep_embedder.linear_2.weight_5_1_3"
],
"out_args": [
"/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/text_embedder/linear_1/Gemm",
"type": "SDGemm",
"in_args": [
"pooled_projections.out17_3_1"
],
"const_args": [
"time_text_embed.text_embedder.linear_1.weight_5_1_0"
],
"out_args": [
"/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"2048"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"2048",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/text_embedder/act_1/Sigmoid",
"type": "SDSilu",
"in_args": [
"/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1"
],
"const_args": [
"/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0"
],
"out_args": [
"/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"weight_shape": {
"type": "int",
"value": [
"128"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/text_embedder/linear_2/Gemm",
"type": "SDGemm",
"in_args": [
"/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0"
],
"const_args": [
"time_text_embed.text_embedder.linear_2.weight_5_1_1"
],
"out_args": [
"/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/time_text_embed/Add",
"type": "SDAdd",
"in_args": [
"/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4",
"/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2"
],
"const_args": [],
"out_args": [
"/time_text_embed/Add_output_0.out_35_1_4"
],
"attrs": {
"a_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"b_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"c_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"is_bias_add": {
"type": "int",
"value": [
"0"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm1/silu/Sigmoid",
"type": "SDSilu",
"in_args": [
"/time_text_embed/Add_output_0.out_35_1_4"
],
"const_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2"
],
"out_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"weight_shape": {
"type": "int",
"value": [
"128"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp",
"type": "SDCastBf2Bfp",
"in_args": [
"encoder_hidden_states.out17_3_0"
],
"const_args": [
"encoder_hidden_states.out17_3_0_bfp.wts"
],
"out_args": [
"encoder_hidden_states.out17_3_0_bfp.out25_0"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"4096"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"4096"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/context_embedder/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"encoder_hidden_states.out17_3_0_bfp.out25_0"
],
"const_args": [
"onnx::MatMul_1943"
],
"out_args": [
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"4096"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"4096",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf",
"type": "SDCastBfp2Bf",
"in_args": [
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0"
],
"const_args": [
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts"
],
"out_args": [
"/context_embedder/Add_output_0.out17_3_0"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/context_embedder/Add_output_0.out17_3_0"
],
"const_args": [
"/transformer_blocks.0/norm1_context/norm/Constant_output_0",
"/transformer_blocks.0/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0"
],
"const_args": [
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0"
],
"out_args": [
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/Add_output_0.out_35_1_3"
],
"const_args": [
"/transformer_blocks.0/norm1/norm/Constant_output_0",
"/transformer_blocks.0/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1"
],
"const_args": [
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3"
],
"out_args": [
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0",
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22"
],
"const_args": [
"onnx::MatMul_1947_onnx::MatMul_1944"
],
"out_args": [
"/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0",
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22"
],
"const_args": [
"onnx::MatMul_1948_onnx::MatMul_1945"
],
"out_args": [
"/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0",
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22"
],
"const_args": [
"onnx::MatMul_1949_onnx::MatMul_1946"
],
"out_args": [
"/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/MatMulmha_18_0_0",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0",
"/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1",
"/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2"
],
"const_args": [],
"out_args": [
"/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/to_add_out/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0"
],
"const_args": [
"onnx::MatMul_1964"
],
"out_args": [
"/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"end": {
"type": "int",
"value": [
"2147483647"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add_4",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1",
"/context_embedder/Add_output_0.out17_3_0"
],
"const_args": [
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma"
],
"out_args": [
"/transformer_blocks.0/Add_4_output_0.out10_0"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm2_context/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.0/Add_4_output_0.out10_0"
],
"const_args": [
"/transformer_blocks.0/norm2_context/Constant_output_0",
"/transformer_blocks.0/norm2_context/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add_6",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3"
],
"const_args": [
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2"
],
"out_args": [
"/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0"
],
"const_args": [
"onnx::MatMul_1963"
],
"out_args": [
"/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1"
],
"const_args": [
"onnx::MatMul_1967"
],
"out_args": [
"/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/ff_context/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6"
],
"const_args": [
"onnx::MatMul_1968"
],
"out_args": [
"/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add_7",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8",
"/transformer_blocks.0/Add_4_output_0.out10_0"
],
"const_args": [
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma"
],
"out_args": [
"/transformer_blocks.0/Add_7_output_0.out10_1"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0",
"/Add_output_0.out_35_1_3"
],
"const_args": [
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma"
],
"out_args": [
"/transformer_blocks.0/Add_output_0.out10_20"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.0/Add_output_0.out10_20"
],
"const_args": [
"/transformer_blocks.0/norm2/Constant_output_0",
"/transformer_blocks.0/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2"
],
"const_args": [
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0"
],
"out_args": [
"/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21"
],
"const_args": [
"onnx::MatMul_1965"
],
"out_args": [
"/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5"
],
"const_args": [
"onnx::MatMul_1966"
],
"out_args": [
"/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.0/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7",
"/transformer_blocks.0/Add_output_0.out10_20"
],
"const_args": [
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma"
],
"out_args": [
"/transformer_blocks.0/Add_3_output_0.out10_21"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.0/Add_3_output_0.out10_21"
],
"const_args": [
"/transformer_blocks.1/norm1/norm/Constant_output_0",
"/transformer_blocks.1/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4"
],
"const_args": [
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1"
],
"out_args": [
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.0/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.0/Add_3_output_0.out10_21"
],
"const_args": [
"onnx::MatMul_2088"
],
"out_args": [
"/controlnet_blocks.0/Add_output_0.out17_3_13"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.0/Add_7_output_0.out10_1"
],
"const_args": [
"/transformer_blocks.1/norm1_context/norm/Constant_output_0",
"/transformer_blocks.1/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5"
],
"const_args": [
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1"
],
"out_args": [
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4",
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2"
],
"const_args": [
"onnx::MatMul_1972_onnx::MatMul_1969"
],
"out_args": [
"/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4",
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2"
],
"const_args": [
"onnx::MatMul_1973_onnx::MatMul_1970"
],
"out_args": [
"/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4",
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2"
],
"const_args": [
"onnx::MatMul_1974_onnx::MatMul_1971"
],
"out_args": [
"/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/MatMulmha_18_0_1",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3",
"/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4",
"/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5"
],
"const_args": [],
"out_args": [
"/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1"
],
"const_args": [
"onnx::MatMul_1988"
],
"out_args": [
"/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2",
"/transformer_blocks.0/Add_3_output_0.out10_21"
],
"const_args": [
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma"
],
"out_args": [
"/transformer_blocks.1/Add_output_0.out10_2"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.1/Add_output_0.out10_2"
],
"const_args": [
"/transformer_blocks.1/norm2/Constant_output_0",
"/transformer_blocks.1/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6"
],
"const_args": [
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4"
],
"out_args": [
"/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3"
],
"const_args": [
"onnx::MatMul_1990"
],
"out_args": [
"/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10"
],
"const_args": [
"onnx::MatMul_1991"
],
"out_args": [
"/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12",
"/transformer_blocks.1/Add_output_0.out10_2"
],
"const_args": [
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma"
],
"out_args": [
"/transformer_blocks.1/Add_3_output_0.out10_3"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.1/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.1/Add_3_output_0.out10_3"
],
"const_args": [
"onnx::MatMul_2089"
],
"out_args": [
"/controlnet_blocks.1/Add_output_0.out17_3_22"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/attn/to_add_out/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1"
],
"const_args": [
"onnx::MatMul_1989"
],
"out_args": [
"/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"end": {
"type": "int",
"value": [
"2147483647"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add_4",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3",
"/transformer_blocks.0/Add_7_output_0.out10_1"
],
"const_args": [
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma"
],
"out_args": [
"/transformer_blocks.1/Add_4_output_0.out10_4"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/norm2_context/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.1/Add_4_output_0.out10_4"
],
"const_args": [
"/transformer_blocks.1/norm2_context/Constant_output_0",
"/transformer_blocks.1/norm2_context/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add_6",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7"
],
"const_args": [
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4"
],
"out_args": [
"/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5"
],
"const_args": [
"onnx::MatMul_1992"
],
"out_args": [
"/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/ff_context/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11"
],
"const_args": [
"onnx::MatMul_1993"
],
"out_args": [
"/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.1/Add_7",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13",
"/transformer_blocks.1/Add_4_output_0.out10_4"
],
"const_args": [
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma"
],
"out_args": [
"/transformer_blocks.1/Add_7_output_0.out10_5"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.1/Add_3_output_0.out10_3"
],
"const_args": [
"/transformer_blocks.2/norm1/norm/Constant_output_0",
"/transformer_blocks.2/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8"
],
"const_args": [
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1"
],
"out_args": [
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.1/Add_7_output_0.out10_5"
],
"const_args": [
"/transformer_blocks.2/norm1_context/norm/Constant_output_0",
"/transformer_blocks.2/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9"
],
"const_args": [
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1"
],
"out_args": [
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8",
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6"
],
"const_args": [
"onnx::MatMul_1997_onnx::MatMul_1994"
],
"out_args": [
"/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8",
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6"
],
"const_args": [
"onnx::MatMul_1998_onnx::MatMul_1995"
],
"out_args": [
"/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8",
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6"
],
"const_args": [
"onnx::MatMul_1999_onnx::MatMul_1996"
],
"out_args": [
"/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/MatMulmha_18_0_2",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6",
"/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7",
"/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8"
],
"const_args": [],
"out_args": [
"/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2"
],
"const_args": [
"onnx::MatMul_2013"
],
"out_args": [
"/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4",
"/transformer_blocks.1/Add_3_output_0.out10_3"
],
"const_args": [
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma"
],
"out_args": [
"/transformer_blocks.2/Add_output_0.out10_6"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.2/Add_output_0.out10_6"
],
"const_args": [
"/transformer_blocks.2/norm2/Constant_output_0",
"/transformer_blocks.2/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10"
],
"const_args": [
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4"
],
"out_args": [
"/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7"
],
"const_args": [
"onnx::MatMul_2015"
],
"out_args": [
"/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15"
],
"const_args": [
"onnx::MatMul_2016"
],
"out_args": [
"/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17",
"/transformer_blocks.2/Add_output_0.out10_6"
],
"const_args": [
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma"
],
"out_args": [
"/transformer_blocks.2/Add_3_output_0.out10_7"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.2/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.2/Add_3_output_0.out10_7"
],
"const_args": [
"onnx::MatMul_2090"
],
"out_args": [
"/controlnet_blocks.2/Add_output_0.out17_3_31"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/attn/to_add_out/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2"
],
"const_args": [
"onnx::MatMul_2014"
],
"out_args": [
"/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"end": {
"type": "int",
"value": [
"2147483647"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add_4",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5",
"/transformer_blocks.1/Add_7_output_0.out10_5"
],
"const_args": [
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma"
],
"out_args": [
"/transformer_blocks.2/Add_4_output_0.out10_8"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/norm2_context/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.2/Add_4_output_0.out10_8"
],
"const_args": [
"/transformer_blocks.2/norm2_context/Constant_output_0",
"/transformer_blocks.2/norm2_context/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add_6",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11"
],
"const_args": [
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4"
],
"out_args": [
"/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9"
],
"const_args": [
"onnx::MatMul_2017"
],
"out_args": [
"/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/ff_context/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16"
],
"const_args": [
"onnx::MatMul_2018"
],
"out_args": [
"/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.2/Add_7",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18",
"/transformer_blocks.2/Add_4_output_0.out10_8"
],
"const_args": [
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma"
],
"out_args": [
"/transformer_blocks.2/Add_7_output_0.out10_9"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.2/Add_3_output_0.out10_7"
],
"const_args": [
"/transformer_blocks.3/norm1/norm/Constant_output_0",
"/transformer_blocks.3/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12"
],
"const_args": [
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1"
],
"out_args": [
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.2/Add_7_output_0.out10_9"
],
"const_args": [
"/transformer_blocks.3/norm1_context/norm/Constant_output_0",
"/transformer_blocks.3/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13"
],
"const_args": [
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1"
],
"out_args": [
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12",
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10"
],
"const_args": [
"onnx::MatMul_2022_onnx::MatMul_2019"
],
"out_args": [
"/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12",
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10"
],
"const_args": [
"onnx::MatMul_2023_onnx::MatMul_2020"
],
"out_args": [
"/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12",
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10"
],
"const_args": [
"onnx::MatMul_2024_onnx::MatMul_2021"
],
"out_args": [
"/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/MatMulmha_18_0_3",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9",
"/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10",
"/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11"
],
"const_args": [],
"out_args": [
"/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3"
],
"const_args": [
"onnx::MatMul_2038"
],
"out_args": [
"/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6",
"/transformer_blocks.2/Add_3_output_0.out10_7"
],
"const_args": [
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma"
],
"out_args": [
"/transformer_blocks.3/Add_output_0.out10_10"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.3/Add_output_0.out10_10"
],
"const_args": [
"/transformer_blocks.3/norm2/Constant_output_0",
"/transformer_blocks.3/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14"
],
"const_args": [
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4"
],
"out_args": [
"/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11"
],
"const_args": [
"onnx::MatMul_2040"
],
"out_args": [
"/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20"
],
"const_args": [
"onnx::MatMul_2041"
],
"out_args": [
"/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22",
"/transformer_blocks.3/Add_output_0.out10_10"
],
"const_args": [
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma"
],
"out_args": [
"/transformer_blocks.3/Add_3_output_0.out10_11"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.3/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.3/Add_3_output_0.out10_11"
],
"const_args": [
"onnx::MatMul_2091"
],
"out_args": [
"/controlnet_blocks.3/Add_output_0.out17_3_40"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/attn/to_add_out/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3"
],
"const_args": [
"onnx::MatMul_2039"
],
"out_args": [
"/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"end": {
"type": "int",
"value": [
"2147483647"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add_4",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7",
"/transformer_blocks.2/Add_7_output_0.out10_9"
],
"const_args": [
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma"
],
"out_args": [
"/transformer_blocks.3/Add_4_output_0.out10_12"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/norm2_context/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.3/Add_4_output_0.out10_12"
],
"const_args": [
"/transformer_blocks.3/norm2_context/Constant_output_0",
"/transformer_blocks.3/norm2_context/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add_6",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15"
],
"const_args": [
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4"
],
"out_args": [
"/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13"
],
"const_args": [
"onnx::MatMul_2042"
],
"out_args": [
"/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/ff_context/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21"
],
"const_args": [
"onnx::MatMul_2043"
],
"out_args": [
"/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.3/Add_7",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23",
"/transformer_blocks.3/Add_4_output_0.out10_12"
],
"const_args": [
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma"
],
"out_args": [
"/transformer_blocks.3/Add_7_output_0.out10_13"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.3/Add_3_output_0.out10_11"
],
"const_args": [
"/transformer_blocks.4/norm1/norm/Constant_output_0",
"/transformer_blocks.4/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16"
],
"const_args": [
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1"
],
"out_args": [
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.3/Add_7_output_0.out10_13"
],
"const_args": [
"/transformer_blocks.4/norm1_context/norm/Constant_output_0",
"/transformer_blocks.4/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17"
],
"const_args": [
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1"
],
"out_args": [
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16",
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14"
],
"const_args": [
"onnx::MatMul_2047_onnx::MatMul_2044"
],
"out_args": [
"/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16",
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14"
],
"const_args": [
"onnx::MatMul_2048_onnx::MatMul_2045"
],
"out_args": [
"/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16",
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14"
],
"const_args": [
"onnx::MatMul_2049_onnx::MatMul_2046"
],
"out_args": [
"/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/MatMulmha_18_0_4",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12",
"/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13",
"/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14"
],
"const_args": [],
"out_args": [
"/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4"
],
"const_args": [
"onnx::MatMul_2063"
],
"out_args": [
"/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8",
"/transformer_blocks.3/Add_3_output_0.out10_11"
],
"const_args": [
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma"
],
"out_args": [
"/transformer_blocks.4/Add_output_0.out10_14"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.4/Add_output_0.out10_14"
],
"const_args": [
"/transformer_blocks.4/norm2/Constant_output_0",
"/transformer_blocks.4/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18"
],
"const_args": [
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4"
],
"out_args": [
"/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15"
],
"const_args": [
"onnx::MatMul_2065"
],
"out_args": [
"/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25"
],
"const_args": [
"onnx::MatMul_2066"
],
"out_args": [
"/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27",
"/transformer_blocks.4/Add_output_0.out10_14"
],
"const_args": [
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma"
],
"out_args": [
"/transformer_blocks.4/Add_3_output_0.out10_15"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.4/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.4/Add_3_output_0.out10_15"
],
"const_args": [
"onnx::MatMul_2092"
],
"out_args": [
"/controlnet_blocks.4/Add_output_0.out17_3_49"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/attn/to_add_out/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4"
],
"const_args": [
"onnx::MatMul_2064"
],
"out_args": [
"/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"end": {
"type": "int",
"value": [
"2147483647"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add_4",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9",
"/transformer_blocks.3/Add_7_output_0.out10_13"
],
"const_args": [
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma"
],
"out_args": [
"/transformer_blocks.4/Add_4_output_0.out10_16"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/norm2_context/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.4/Add_4_output_0.out10_16"
],
"const_args": [
"/transformer_blocks.4/norm2_context/Constant_output_0",
"/transformer_blocks.4/norm2_context/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add_6",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19"
],
"const_args": [
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4"
],
"out_args": [
"/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17"
],
"const_args": [
"onnx::MatMul_2067"
],
"out_args": [
"/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/ff_context/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26"
],
"const_args": [
"onnx::MatMul_2068"
],
"out_args": [
"/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.4/Add_7",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28",
"/transformer_blocks.4/Add_4_output_0.out10_16"
],
"const_args": [
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma"
],
"out_args": [
"/transformer_blocks.4/Add_7_output_0.out10_17"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/norm1/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.4/Add_3_output_0.out10_15"
],
"const_args": [
"/transformer_blocks.5/norm1/norm/Constant_output_0",
"/transformer_blocks.5/norm1/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/norm1/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20"
],
"const_args": [
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1"
],
"out_args": [
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.4/Add_7_output_0.out10_17"
],
"const_args": [
"/transformer_blocks.5/norm1_context/norm/Constant_output_0",
"/transformer_blocks.5/norm1_context/norm/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/norm1_context/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21"
],
"const_args": [
"transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1"
],
"out_args": [
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/attn/Concat",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20",
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18"
],
"const_args": [
"onnx::MatMul_2072_onnx::MatMul_2069"
],
"out_args": [
"/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/attn/Concat_1",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20",
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18"
],
"const_args": [
"onnx::MatMul_2073_onnx::MatMul_2070"
],
"out_args": [
"/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16"
],
"attrs": {
"trans_head": {
"type": "int",
"value": [
"1"
]
},
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/attn/Concat_2",
"type": "SDGemmConcat_bfp",
"in_args": [
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20",
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18"
],
"const_args": [
"onnx::MatMul_2074_onnx::MatMul_2071"
],
"out_args": [
"/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17"
],
"attrs": {
"head_num": {
"type": "int",
"value": [
"24"
]
},
"concat_axis": {
"type": "int",
"value": [
"1"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape_0": {
"type": "str",
"value": [
"batch_size",
"max_length",
"1536"
]
},
"output_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"trans_head": {
"type": "int",
"value": [
"3"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/attn/MatMulmha_18_0_5",
"type": "SDMHA_bfp",
"in_args": [
"/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15",
"/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16",
"/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17"
],
"const_args": [],
"out_args": [
"/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5"
],
"attrs": {
"num_heads": {
"type": "int",
"value": [
"24"
]
},
"unidirectional": {
"type": "int",
"value": [
"0"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"64",
"max_length + floor(h/2)*floor(w/2)"
]
},
"op_version": {
"type": "str",
"value": [
"v2"
]
},
"is_flash_mha": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/attn/to_out.0/MatMul",
"type": "SDSliceGemm_bfp",
"in_args": [
"/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5"
],
"const_args": [
"onnx::MatMul_2085"
],
"out_args": [
"/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10"
],
"attrs": {
"axes": {
"type": "int",
"value": [
"1"
]
},
"slice_shape": {
"type": "str",
"value": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"1536"
]
},
"start": {
"type": "int",
"value": [
"0"
]
},
"end": {
"type": "str",
"value": [
"floor(h/2)*floor(w/2)"
]
},
"step": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/Add",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10",
"/transformer_blocks.4/Add_3_output_0.out10_15"
],
"const_args": [
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma"
],
"out_args": [
"/transformer_blocks.5/Add_output_0.out10_18"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/norm2/LayerNormalization",
"type": "SDLayerNorm_bfbfp",
"in_args": [
"/transformer_blocks.5/Add_output_0.out10_18"
],
"const_args": [
"/transformer_blocks.5/norm2/Constant_output_0",
"/transformer_blocks.5/norm2/Constant_1_output_0"
],
"out_args": [
"/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22"
],
"attrs": {
"axis": {
"type": "int",
"value": [
"-1"
]
},
"epsilon": {
"type": "float",
"value": [
"9.999999974752427e-07"
]
},
"stash_type": {
"type": "int",
"value": [
"1"
]
},
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"gamma_shape": {
"type": "int",
"value": [
"1536"
]
},
"beta_shape": {
"type": "int",
"value": [
"1536"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfloat16"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/Add_2",
"type": "SDGemmGemmMulAdd_bfp",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22"
],
"const_args": [
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4"
],
"out_args": [
"/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"weight_shape_1": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/ff/net.0/proj/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19"
],
"const_args": [
"onnx::MatMul_2086"
],
"out_args": [
"/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"6144"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"nonlinear": {
"type": "str",
"value": [
"Gelu"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/ff/net.2/MatMul",
"type": "SDGemm_bfp",
"in_args": [
"/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30"
],
"const_args": [
"onnx::MatMul_2087"
],
"out_args": [
"/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"6144"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"6144",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfp16ebs8"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfp16ebs8",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/transformer_blocks.5/Add_3",
"type": "SDGemmMulAdd_bfpbfbf",
"in_args": [
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31",
"/transformer_blocks.5/Add_output_0.out10_18"
],
"const_args": [
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma"
],
"out_args": [
"/transformer_blocks.5/Add_3_output_0.out10_19"
],
"attrs": {
"input_shape_0": {
"type": "str",
"value": [
"batch_size",
"1",
"1536"
]
},
"input_shape_1": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"input_shape_2": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape_0": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16",
"bfp16ebs8"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
},
{
"name": "/controlnet_blocks.5/MatMul",
"type": "SDGemm",
"in_args": [
"/transformer_blocks.5/Add_3_output_0.out10_19"
],
"const_args": [
"onnx::MatMul_2093"
],
"out_args": [
"/controlnet_blocks.5/Add_output_0.out17_3_57"
],
"attrs": {
"input_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"output_shape": {
"type": "str",
"value": [
"batch_size",
"floor(h/2)*floor(w/2)",
"1536"
]
},
"weight_shape": {
"type": "int",
"value": [
"1536",
"1536"
]
},
"out_dtypes": {
"type": "str",
"value": [
"bfloat16"
]
},
"bias_enable": {
"type": "int",
"value": [
"1"
]
},
"in_dtypes": {
"type": "str",
"value": [
"bfloat16",
"bfp16ebs8",
"bfloat16"
]
},
"ctrl_packet": {
"type": "int",
"value": [
"1"
]
}
}
}
],
"fused_tensors": {
"in": {
"buffer_size": 15936,
"xrt_arg_id": 0,
"packed_tensors": [
"hidden_states_nhwc.out5_0_0",
"/pos_embed/Reshape_1_output_0.out_35_1_2",
"controlnet_cond_nhwc.out5_0_1",
"/time_text_embed/Cast_output_0.out17_3_3",
"pooled_projections.out17_3_1",
"encoder_hidden_states.out17_3_0"
]
},
"out": {
"buffer_size": 18432,
"xrt_arg_id": 1,
"packed_tensors": [
"/controlnet_blocks.0/Add_output_0.out17_3_13",
"/controlnet_blocks.1/Add_output_0.out17_3_22",
"/controlnet_blocks.2/Add_output_0.out17_3_31",
"/controlnet_blocks.3/Add_output_0.out17_3_40",
"/controlnet_blocks.4/Add_output_0.out17_3_49",
"/controlnet_blocks.5/Add_output_0.out17_3_57"
]
},
"scratch": {
"buffer_size": 322048,
"xrt_arg_id": 2,
"packed_tensors": [
"/pos_embed/Transpose_output_0.out5_0_0",
"/pos_embed/Add_2_output_0.out_35_1_2",
"/pos_embed_input/Transpose_output_0.out5_0_1",
"/Add_output_0.out_35_1_3",
"/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3",
"/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1",
"/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4",
"/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1",
"/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0",
"/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2",
"/time_text_embed/Add_output_0.out_35_1_4",
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2",
"encoder_hidden_states.out17_3_0_bfp.out25_0",
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0",
"/context_embedder/Add_output_0.out17_3_0",
"/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0",
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0",
"/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1",
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22",
"/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0",
"/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1",
"/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2",
"/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0",
"/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1",
"/transformer_blocks.0/Add_4_output_0.out10_0",
"/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3",
"/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1",
"/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0",
"/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6",
"/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8",
"/transformer_blocks.0/Add_7_output_0.out10_1",
"/transformer_blocks.0/Add_output_0.out10_20",
"/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2",
"/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21",
"/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5",
"/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7",
"/transformer_blocks.0/Add_3_output_0.out10_21",
"/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4",
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2",
"/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5",
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4",
"/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3",
"/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4",
"/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5",
"/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1",
"/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2",
"/transformer_blocks.1/Add_output_0.out10_2",
"/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6",
"/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3",
"/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10",
"/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12",
"/transformer_blocks.1/Add_3_output_0.out10_3",
"/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3",
"/transformer_blocks.1/Add_4_output_0.out10_4",
"/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7",
"/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5",
"/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11",
"/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13",
"/transformer_blocks.1/Add_7_output_0.out10_5",
"/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8",
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6",
"/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9",
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8",
"/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6",
"/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7",
"/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8",
"/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2",
"/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4",
"/transformer_blocks.2/Add_output_0.out10_6",
"/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10",
"/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7",
"/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15",
"/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17",
"/transformer_blocks.2/Add_3_output_0.out10_7",
"/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5",
"/transformer_blocks.2/Add_4_output_0.out10_8",
"/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11",
"/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9",
"/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16",
"/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18",
"/transformer_blocks.2/Add_7_output_0.out10_9",
"/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12",
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10",
"/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13",
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12",
"/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9",
"/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10",
"/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11",
"/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3",
"/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6",
"/transformer_blocks.3/Add_output_0.out10_10",
"/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14",
"/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11",
"/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20",
"/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22",
"/transformer_blocks.3/Add_3_output_0.out10_11",
"/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7",
"/transformer_blocks.3/Add_4_output_0.out10_12",
"/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15",
"/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13",
"/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21",
"/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23",
"/transformer_blocks.3/Add_7_output_0.out10_13",
"/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16",
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14",
"/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17",
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16",
"/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12",
"/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13",
"/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14",
"/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4",
"/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8",
"/transformer_blocks.4/Add_output_0.out10_14",
"/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18",
"/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15",
"/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25",
"/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27",
"/transformer_blocks.4/Add_3_output_0.out10_15",
"/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9",
"/transformer_blocks.4/Add_4_output_0.out10_16",
"/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19",
"/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17",
"/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26",
"/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28",
"/transformer_blocks.4/Add_7_output_0.out10_17",
"/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20",
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18",
"/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21",
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20",
"/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15",
"/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16",
"/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17",
"/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5",
"/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10",
"/transformer_blocks.5/Add_output_0.out10_18",
"/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22",
"/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19",
"/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30",
"/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31",
"/transformer_blocks.5/Add_3_output_0.out10_19"
]
},
"const": {
"buffer_size": 579539968,
"xrt_arg_id": 3,
"packed_tensors": [
"pos_embed.proj.weight",
"pos_embed_input.proj.weight",
"time_text_embed.timestep_embedder.linear_1.weight_5_1_2",
"/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1",
"time_text_embed.timestep_embedder.linear_2.weight_5_1_3",
"time_text_embed.text_embedder.linear_1.weight_5_1_0",
"/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0",
"time_text_embed.text_embedder.linear_2.weight_5_1_1",
"/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2",
"encoder_hidden_states.out17_3_0_bfp.wts",
"onnx::MatMul_1943",
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts",
"/transformer_blocks.0/norm1_context/norm/Constant_output_0",
"/transformer_blocks.0/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0",
"/transformer_blocks.0/norm1/norm/Constant_output_0",
"/transformer_blocks.0/norm1/norm/Constant_1_output_0",
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3",
"onnx::MatMul_1947_onnx::MatMul_1944",
"onnx::MatMul_1948_onnx::MatMul_1945",
"onnx::MatMul_1949_onnx::MatMul_1946",
"onnx::MatMul_1964",
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma",
"/transformer_blocks.0/norm2_context/Constant_output_0",
"/transformer_blocks.0/norm2_context/Constant_1_output_0",
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2",
"onnx::MatMul_1963",
"onnx::MatMul_1967",
"onnx::MatMul_1968",
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma",
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma",
"/transformer_blocks.0/norm2/Constant_output_0",
"/transformer_blocks.0/norm2/Constant_1_output_0",
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0",
"onnx::MatMul_1965",
"onnx::MatMul_1966",
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma",
"/transformer_blocks.1/norm1/norm/Constant_output_0",
"/transformer_blocks.1/norm1/norm/Constant_1_output_0",
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1",
"onnx::MatMul_2088",
"/transformer_blocks.1/norm1_context/norm/Constant_output_0",
"/transformer_blocks.1/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1",
"onnx::MatMul_1972_onnx::MatMul_1969",
"onnx::MatMul_1973_onnx::MatMul_1970",
"onnx::MatMul_1974_onnx::MatMul_1971",
"onnx::MatMul_1988",
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma",
"/transformer_blocks.1/norm2/Constant_output_0",
"/transformer_blocks.1/norm2/Constant_1_output_0",
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4",
"onnx::MatMul_1990",
"onnx::MatMul_1991",
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma",
"onnx::MatMul_2089",
"onnx::MatMul_1989",
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma",
"/transformer_blocks.1/norm2_context/Constant_output_0",
"/transformer_blocks.1/norm2_context/Constant_1_output_0",
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4",
"onnx::MatMul_1992",
"onnx::MatMul_1993",
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma",
"/transformer_blocks.2/norm1/norm/Constant_output_0",
"/transformer_blocks.2/norm1/norm/Constant_1_output_0",
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1",
"/transformer_blocks.2/norm1_context/norm/Constant_output_0",
"/transformer_blocks.2/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1",
"onnx::MatMul_1997_onnx::MatMul_1994",
"onnx::MatMul_1998_onnx::MatMul_1995",
"onnx::MatMul_1999_onnx::MatMul_1996",
"onnx::MatMul_2013",
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma",
"/transformer_blocks.2/norm2/Constant_output_0",
"/transformer_blocks.2/norm2/Constant_1_output_0",
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4",
"onnx::MatMul_2015",
"onnx::MatMul_2016",
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma",
"onnx::MatMul_2090",
"onnx::MatMul_2014",
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma",
"/transformer_blocks.2/norm2_context/Constant_output_0",
"/transformer_blocks.2/norm2_context/Constant_1_output_0",
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4",
"onnx::MatMul_2017",
"onnx::MatMul_2018",
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma",
"/transformer_blocks.3/norm1/norm/Constant_output_0",
"/transformer_blocks.3/norm1/norm/Constant_1_output_0",
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1",
"/transformer_blocks.3/norm1_context/norm/Constant_output_0",
"/transformer_blocks.3/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1",
"onnx::MatMul_2022_onnx::MatMul_2019",
"onnx::MatMul_2023_onnx::MatMul_2020",
"onnx::MatMul_2024_onnx::MatMul_2021",
"onnx::MatMul_2038",
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma",
"/transformer_blocks.3/norm2/Constant_output_0",
"/transformer_blocks.3/norm2/Constant_1_output_0",
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4",
"onnx::MatMul_2040",
"onnx::MatMul_2041",
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma",
"onnx::MatMul_2091",
"onnx::MatMul_2039",
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma",
"/transformer_blocks.3/norm2_context/Constant_output_0",
"/transformer_blocks.3/norm2_context/Constant_1_output_0",
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4",
"onnx::MatMul_2042",
"onnx::MatMul_2043",
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma",
"/transformer_blocks.4/norm1/norm/Constant_output_0",
"/transformer_blocks.4/norm1/norm/Constant_1_output_0",
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1",
"/transformer_blocks.4/norm1_context/norm/Constant_output_0",
"/transformer_blocks.4/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1",
"onnx::MatMul_2047_onnx::MatMul_2044",
"onnx::MatMul_2048_onnx::MatMul_2045",
"onnx::MatMul_2049_onnx::MatMul_2046",
"onnx::MatMul_2063",
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma",
"/transformer_blocks.4/norm2/Constant_output_0",
"/transformer_blocks.4/norm2/Constant_1_output_0",
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4",
"onnx::MatMul_2065",
"onnx::MatMul_2066",
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma",
"onnx::MatMul_2092",
"onnx::MatMul_2064",
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma",
"/transformer_blocks.4/norm2_context/Constant_output_0",
"/transformer_blocks.4/norm2_context/Constant_1_output_0",
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4",
"onnx::MatMul_2067",
"onnx::MatMul_2068",
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma",
"/transformer_blocks.5/norm1/norm/Constant_output_0",
"/transformer_blocks.5/norm1/norm/Constant_1_output_0",
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1",
"/transformer_blocks.5/norm1_context/norm/Constant_output_0",
"/transformer_blocks.5/norm1_context/norm/Constant_1_output_0",
"transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1",
"onnx::MatMul_2072_onnx::MatMul_2069",
"onnx::MatMul_2073_onnx::MatMul_2070",
"onnx::MatMul_2074_onnx::MatMul_2071",
"onnx::MatMul_2085",
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma",
"/transformer_blocks.5/norm2/Constant_output_0",
"/transformer_blocks.5/norm2/Constant_1_output_0",
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4",
"onnx::MatMul_2086",
"onnx::MatMul_2087",
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma",
"onnx::MatMul_2093"
]
},
"super_instr": {
"buffer_size": 0,
"xrt_arg_id": 4,
"packed_tensors": []
}
},
"tensor_map": {
"hidden_states_nhwc.out5_0_0": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
1,
1,
16
],
"size_in_bytes": 32,
"op_tensor_size": 32,
"dynamic_shapes": [
"batch_size",
"w",
"h",
"False"
],
"offset": 0
},
"/pos_embed/Reshape_1_output_0.out_35_1_2": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"False",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 32
},
"controlnet_cond_nhwc.out5_0_1": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
1,
1,
16
],
"size_in_bytes": 32,
"op_tensor_size": 32,
"dynamic_shapes": [
"batch_size",
"w",
"h",
"False"
],
"offset": 3104
},
"/time_text_embed/Cast_output_0.out17_3_3": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
256
],
"size_in_bytes": 512,
"op_tensor_size": 512,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 3136
},
"pooled_projections.out17_3_1": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
2048
],
"size_in_bytes": 4096,
"op_tensor_size": 4096,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 3648
},
"encoder_hidden_states.out17_3_0": {
"packed_buffer_label": "in",
"xrt_arg_id": 0,
"dtype": "bfloat16",
"shape": [
1,
1,
4096
],
"size_in_bytes": 8192,
"op_tensor_size": 8192,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 7744
},
"/controlnet_blocks.0/Add_output_0.out17_3_13": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 0
},
"/controlnet_blocks.1/Add_output_0.out17_3_22": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 3072
},
"/controlnet_blocks.2/Add_output_0.out17_3_31": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 6144
},
"/controlnet_blocks.3/Add_output_0.out17_3_40": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 9216
},
"/controlnet_blocks.4/Add_output_0.out17_3_49": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 12288
},
"/controlnet_blocks.5/Add_output_0.out17_3_57": {
"packed_buffer_label": "out",
"xrt_arg_id": 1,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 15360
},
"/pos_embed/Transpose_output_0.out5_0_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 0
},
"/pos_embed/Add_2_output_0.out_35_1_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 3072
},
"/pos_embed_input/Transpose_output_0.out5_0_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 6144
},
"/Add_output_0.out_35_1_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 9216
},
"/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 12288
},
"/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 15360
},
"/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 18432
},
"/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 21504
},
"/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 24576
},
"/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 27648
},
"/time_text_embed/Add_output_0.out_35_1_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 30720
},
"/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"False"
],
"offset": 33792
},
"encoder_hidden_states.out17_3_0_bfp.out25_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
4096
],
"size_in_bytes": 4096,
"op_tensor_size": 4096,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 36864
},
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 40960
},
"/context_embedder/Add_output_0.out17_3_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 42496
},
"/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 45568
},
"/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 47104
},
"/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 48640
},
"/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 50176
},
"/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 51712
},
"/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 53248
},
"/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 54784
},
"/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 56320
},
"/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 57856
},
"/transformer_blocks.0/Add_4_output_0.out10_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 59392
},
"/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 62464
},
"/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 64000
},
"/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 65536
},
"/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 67072
},
"/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 73216
},
"/transformer_blocks.0/Add_7_output_0.out10_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 74752
},
"/transformer_blocks.0/Add_output_0.out10_20": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 77824
},
"/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 80896
},
"/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 82432
},
"/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 83968
},
"/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 90112
},
"/transformer_blocks.0/Add_3_output_0.out10_21": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 91648
},
"/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 94720
},
"/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 96256
},
"/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 97792
},
"/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 99328
},
"/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 100864
},
"/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 102400
},
"/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 103936
},
"/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 105472
},
"/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 107008
},
"/transformer_blocks.1/Add_output_0.out10_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 108544
},
"/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 111616
},
"/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 113152
},
"/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 114688
},
"/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 120832
},
"/transformer_blocks.1/Add_3_output_0.out10_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 122368
},
"/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 125440
},
"/transformer_blocks.1/Add_4_output_0.out10_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 126976
},
"/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 130048
},
"/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 131584
},
"/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 133120
},
"/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 139264
},
"/transformer_blocks.1/Add_7_output_0.out10_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 140800
},
"/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 143872
},
"/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 145408
},
"/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 146944
},
"/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 148480
},
"/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 150016
},
"/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 151552
},
"/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 153088
},
"/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 154624
},
"/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 156160
},
"/transformer_blocks.2/Add_output_0.out10_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 157696
},
"/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 160768
},
"/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 162304
},
"/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 163840
},
"/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 169984
},
"/transformer_blocks.2/Add_3_output_0.out10_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 171520
},
"/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 174592
},
"/transformer_blocks.2/Add_4_output_0.out10_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 176128
},
"/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 179200
},
"/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 180736
},
"/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 182272
},
"/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 188416
},
"/transformer_blocks.2/Add_7_output_0.out10_9": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 189952
},
"/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 193024
},
"/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 194560
},
"/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 196096
},
"/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 197632
},
"/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 199168
},
"/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 200704
},
"/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 202240
},
"/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 203776
},
"/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 205312
},
"/transformer_blocks.3/Add_output_0.out10_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 206848
},
"/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 209920
},
"/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 211456
},
"/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 212992
},
"/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 219136
},
"/transformer_blocks.3/Add_3_output_0.out10_11": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 220672
},
"/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 223744
},
"/transformer_blocks.3/Add_4_output_0.out10_12": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 225280
},
"/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 228352
},
"/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 229888
},
"/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 231424
},
"/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 237568
},
"/transformer_blocks.3/Add_7_output_0.out10_13": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 239104
},
"/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 242176
},
"/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 243712
},
"/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 245248
},
"/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 246784
},
"/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 248320
},
"/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 249856
},
"/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 251392
},
"/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 252928
},
"/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 254464
},
"/transformer_blocks.4/Add_output_0.out10_14": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 256000
},
"/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 259072
},
"/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 260608
},
"/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 262144
},
"/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 268288
},
"/transformer_blocks.4/Add_3_output_0.out10_15": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 269824
},
"/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 272896
},
"/transformer_blocks.4/Add_4_output_0.out10_16": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 274432
},
"/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 277504
},
"/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 279040
},
"/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 280576
},
"/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 286720
},
"/transformer_blocks.4/Add_7_output_0.out10_17": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 288256
},
"/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 291328
},
"/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 292864
},
"/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 294400
},
"/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length",
"False"
],
"offset": 295936
},
"/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 297472
},
"/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 299008
},
"/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
24,
1,
64
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"False",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 300544
},
"/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"max_length + floor(h/2)*floor(w/2)",
"False"
],
"offset": 302080
},
"/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 303616
},
"/transformer_blocks.5/Add_output_0.out10_18": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 305152
},
"/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 308224
},
"/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 309760
},
"/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
6144
],
"size_in_bytes": 6144,
"op_tensor_size": 6144,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 311296
},
"/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "uint8",
"shape": [
1,
1,
1536
],
"size_in_bytes": 1536,
"op_tensor_size": 1536,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 317440
},
"/transformer_blocks.5/Add_3_output_0.out10_19": {
"packed_buffer_label": "scratch",
"xrt_arg_id": 2,
"dtype": "bfloat16",
"shape": [
1,
1,
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"dynamic_shapes": [
"batch_size",
"floor(h/2)*floor(w/2)",
"False"
],
"offset": 318976
},
"pos_embed.proj.weight": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
135168
],
"size_in_bytes": 135168,
"op_tensor_size": 135168,
"offset": 0,
"file_name": "cache/pos_embedprojConv_0.const",
"file_size": 135168
},
"pos_embed_input.proj.weight": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
135168
],
"size_in_bytes": 135168,
"op_tensor_size": 135168,
"offset": 135168,
"file_name": "cache/pos_embedprojConv_1.const",
"file_size": 135168
},
"time_text_embed.timestep_embedder.linear_1.weight_5_1_2": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
448512
],
"size_in_bytes": 448512,
"op_tensor_size": 448512,
"offset": 270336,
"file_name": "cache/pos_embedprojConv_2.const",
"file_size": 448512
},
"/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
128
],
"size_in_bytes": 256,
"op_tensor_size": 256,
"offset": 718848,
"file_name": "cache/pos_embedprojConv_3.const",
"file_size": 256
},
"time_text_embed.timestep_embedder.linear_2.weight_5_1_3": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 719104,
"file_name": "cache/pos_embedprojConv_4.const",
"file_size": 2691072
},
"time_text_embed.text_embedder.linear_1.weight_5_1_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
3588096
],
"size_in_bytes": 3588096,
"op_tensor_size": 3588096,
"offset": 3410176,
"file_name": "cache/pos_embedprojConv_5.const",
"file_size": 3588096
},
"/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
128
],
"size_in_bytes": 256,
"op_tensor_size": 256,
"offset": 6998272,
"file_name": "cache/pos_embedprojConv_6.const",
"file_size": 256
},
"time_text_embed.text_embedder.linear_2.weight_5_1_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 6998528,
"file_name": "cache/pos_embedprojConv_7.const",
"file_size": 2691072
},
"/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
128
],
"size_in_bytes": 256,
"op_tensor_size": 256,
"offset": 9689600,
"file_name": "cache/pos_embedprojConv_8.const",
"file_size": 256
},
"encoder_hidden_states.out17_3_0_bfp.wts": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
64
],
"size_in_bytes": 128,
"op_tensor_size": 128,
"offset": 9689856,
"file_name": "cache/pos_embedprojConv_9.const",
"file_size": 128
},
"onnx::MatMul_1943": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
7274496
],
"size_in_bytes": 7274496,
"op_tensor_size": 7274496,
"offset": 9689984,
"file_name": "cache/pos_embedprojConv_10.const",
"file_size": 7274496
},
"/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
64
],
"size_in_bytes": 128,
"op_tensor_size": 128,
"offset": 16964480,
"file_name": "cache/pos_embedprojConv_11.const",
"file_size": 128
},
"/transformer_blocks.0/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 16964608,
"file_name": "cache/pos_embedprojConv_12.const",
"file_size": 3072
},
"/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 16967680,
"file_name": "cache/pos_embedprojConv_13.const",
"file_size": 3072
},
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 16970752,
"file_name": "cache/pos_embedprojConv_14.const",
"file_size": 5382144
},
"/transformer_blocks.0/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 22352896,
"file_name": "cache/pos_embedprojConv_15.const",
"file_size": 3072
},
"/transformer_blocks.0/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 22355968,
"file_name": "cache/pos_embedprojConv_16.const",
"file_size": 3072
},
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 22359040,
"file_name": "cache/pos_embedprojConv_17.const",
"file_size": 5382144
},
"onnx::MatMul_1947_onnx::MatMul_1944": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 27741184,
"file_name": "cache/pos_embedprojConv_18.const",
"file_size": 5382144
},
"onnx::MatMul_1948_onnx::MatMul_1945": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 33123328,
"file_name": "cache/pos_embedprojConv_19.const",
"file_size": 5382144
},
"onnx::MatMul_1949_onnx::MatMul_1946": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 38505472,
"file_name": "cache/pos_embedprojConv_20.const",
"file_size": 5382144
},
"onnx::MatMul_1964": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 43887616,
"file_name": "cache/pos_embedprojConv_21.const",
"file_size": 2691072
},
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 46578688,
"file_name": "cache/pos_embedprojConv_22.const",
"file_size": 2691072
},
"/transformer_blocks.0/norm2_context/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 49269760,
"file_name": "cache/pos_embedprojConv_23.const",
"file_size": 3072
},
"/transformer_blocks.0/norm2_context/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 49272832,
"file_name": "cache/pos_embedprojConv_24.const",
"file_size": 3072
},
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 49275904,
"file_name": "cache/pos_embedprojConv_25.const",
"file_size": 5382144
},
"onnx::MatMul_1963": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 54658048,
"file_name": "cache/pos_embedprojConv_26.const",
"file_size": 2691072
},
"onnx::MatMul_1967": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 57349120,
"file_name": "cache/pos_embedprojConv_27.const",
"file_size": 10764288
},
"onnx::MatMul_1968": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 68113408,
"file_name": "cache/pos_embedprojConv_28.const",
"file_size": 10764288
},
"transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 78877696,
"file_name": "cache/pos_embedprojConv_29.const",
"file_size": 2691072
},
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 81568768,
"file_name": "cache/pos_embedprojConv_30.const",
"file_size": 2691072
},
"/transformer_blocks.0/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 84259840,
"file_name": "cache/pos_embedprojConv_31.const",
"file_size": 3072
},
"/transformer_blocks.0/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 84262912,
"file_name": "cache/pos_embedprojConv_32.const",
"file_size": 3072
},
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 84265984,
"file_name": "cache/pos_embedprojConv_33.const",
"file_size": 5382144
},
"onnx::MatMul_1965": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 89648128,
"file_name": "cache/pos_embedprojConv_34.const",
"file_size": 10764288
},
"onnx::MatMul_1966": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 100412416,
"file_name": "cache/pos_embedprojConv_35.const",
"file_size": 10764288
},
"transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 111176704,
"file_name": "cache/pos_embedprojConv_36.const",
"file_size": 2691072
},
"/transformer_blocks.1/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 113867776,
"file_name": "cache/pos_embedprojConv_37.const",
"file_size": 3072
},
"/transformer_blocks.1/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 113870848,
"file_name": "cache/pos_embedprojConv_38.const",
"file_size": 3072
},
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 113873920,
"file_name": "cache/pos_embedprojConv_39.const",
"file_size": 5382144
},
"onnx::MatMul_2088": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 119256064,
"file_name": "cache/pos_embedprojConv_40.const",
"file_size": 2691072
},
"/transformer_blocks.1/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 121947136,
"file_name": "cache/pos_embedprojConv_41.const",
"file_size": 3072
},
"/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 121950208,
"file_name": "cache/pos_embedprojConv_42.const",
"file_size": 3072
},
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 121953280,
"file_name": "cache/pos_embedprojConv_43.const",
"file_size": 5382144
},
"onnx::MatMul_1972_onnx::MatMul_1969": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 127335424,
"file_name": "cache/pos_embedprojConv_44.const",
"file_size": 5382144
},
"onnx::MatMul_1973_onnx::MatMul_1970": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 132717568,
"file_name": "cache/pos_embedprojConv_45.const",
"file_size": 5382144
},
"onnx::MatMul_1974_onnx::MatMul_1971": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 138099712,
"file_name": "cache/pos_embedprojConv_46.const",
"file_size": 5382144
},
"onnx::MatMul_1988": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 143481856,
"file_name": "cache/pos_embedprojConv_47.const",
"file_size": 2691072
},
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 146172928,
"file_name": "cache/pos_embedprojConv_48.const",
"file_size": 2691072
},
"/transformer_blocks.1/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 148864000,
"file_name": "cache/pos_embedprojConv_49.const",
"file_size": 3072
},
"/transformer_blocks.1/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 148867072,
"file_name": "cache/pos_embedprojConv_50.const",
"file_size": 3072
},
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 148870144,
"file_name": "cache/pos_embedprojConv_51.const",
"file_size": 5382144
},
"onnx::MatMul_1990": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 154252288,
"file_name": "cache/pos_embedprojConv_52.const",
"file_size": 10764288
},
"onnx::MatMul_1991": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 165016576,
"file_name": "cache/pos_embedprojConv_53.const",
"file_size": 10764288
},
"transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 175780864,
"file_name": "cache/pos_embedprojConv_54.const",
"file_size": 2691072
},
"onnx::MatMul_2089": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 178471936,
"file_name": "cache/pos_embedprojConv_55.const",
"file_size": 2691072
},
"onnx::MatMul_1989": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 181163008,
"file_name": "cache/pos_embedprojConv_56.const",
"file_size": 2691072
},
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 183854080,
"file_name": "cache/pos_embedprojConv_57.const",
"file_size": 2691072
},
"/transformer_blocks.1/norm2_context/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 186545152,
"file_name": "cache/pos_embedprojConv_58.const",
"file_size": 3072
},
"/transformer_blocks.1/norm2_context/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 186548224,
"file_name": "cache/pos_embedprojConv_59.const",
"file_size": 3072
},
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 186551296,
"file_name": "cache/pos_embedprojConv_60.const",
"file_size": 5382144
},
"onnx::MatMul_1992": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 191933440,
"file_name": "cache/pos_embedprojConv_61.const",
"file_size": 10764288
},
"onnx::MatMul_1993": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 202697728,
"file_name": "cache/pos_embedprojConv_62.const",
"file_size": 10764288
},
"transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 213462016,
"file_name": "cache/pos_embedprojConv_63.const",
"file_size": 2691072
},
"/transformer_blocks.2/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 216153088,
"file_name": "cache/pos_embedprojConv_64.const",
"file_size": 3072
},
"/transformer_blocks.2/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 216156160,
"file_name": "cache/pos_embedprojConv_65.const",
"file_size": 3072
},
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 216159232,
"file_name": "cache/pos_embedprojConv_66.const",
"file_size": 5382144
},
"/transformer_blocks.2/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 221541376,
"file_name": "cache/pos_embedprojConv_67.const",
"file_size": 3072
},
"/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 221544448,
"file_name": "cache/pos_embedprojConv_68.const",
"file_size": 3072
},
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 221547520,
"file_name": "cache/pos_embedprojConv_69.const",
"file_size": 5382144
},
"onnx::MatMul_1997_onnx::MatMul_1994": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 226929664,
"file_name": "cache/pos_embedprojConv_70.const",
"file_size": 5382144
},
"onnx::MatMul_1998_onnx::MatMul_1995": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 232311808,
"file_name": "cache/pos_embedprojConv_71.const",
"file_size": 5382144
},
"onnx::MatMul_1999_onnx::MatMul_1996": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 237693952,
"file_name": "cache/pos_embedprojConv_72.const",
"file_size": 5382144
},
"onnx::MatMul_2013": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 243076096,
"file_name": "cache/pos_embedprojConv_73.const",
"file_size": 2691072
},
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 245767168,
"file_name": "cache/pos_embedprojConv_74.const",
"file_size": 2691072
},
"/transformer_blocks.2/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 248458240,
"file_name": "cache/pos_embedprojConv_75.const",
"file_size": 3072
},
"/transformer_blocks.2/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 248461312,
"file_name": "cache/pos_embedprojConv_76.const",
"file_size": 3072
},
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 248464384,
"file_name": "cache/pos_embedprojConv_77.const",
"file_size": 5382144
},
"onnx::MatMul_2015": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 253846528,
"file_name": "cache/pos_embedprojConv_78.const",
"file_size": 10764288
},
"onnx::MatMul_2016": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 264610816,
"file_name": "cache/pos_embedprojConv_79.const",
"file_size": 10764288
},
"transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 275375104,
"file_name": "cache/pos_embedprojConv_80.const",
"file_size": 2691072
},
"onnx::MatMul_2090": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 278066176,
"file_name": "cache/pos_embedprojConv_81.const",
"file_size": 2691072
},
"onnx::MatMul_2014": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 280757248,
"file_name": "cache/pos_embedprojConv_82.const",
"file_size": 2691072
},
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 283448320,
"file_name": "cache/pos_embedprojConv_83.const",
"file_size": 2691072
},
"/transformer_blocks.2/norm2_context/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 286139392,
"file_name": "cache/pos_embedprojConv_84.const",
"file_size": 3072
},
"/transformer_blocks.2/norm2_context/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 286142464,
"file_name": "cache/pos_embedprojConv_85.const",
"file_size": 3072
},
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 286145536,
"file_name": "cache/pos_embedprojConv_86.const",
"file_size": 5382144
},
"onnx::MatMul_2017": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 291527680,
"file_name": "cache/pos_embedprojConv_87.const",
"file_size": 10764288
},
"onnx::MatMul_2018": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 302291968,
"file_name": "cache/pos_embedprojConv_88.const",
"file_size": 10764288
},
"transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 313056256,
"file_name": "cache/pos_embedprojConv_89.const",
"file_size": 2691072
},
"/transformer_blocks.3/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 315747328,
"file_name": "cache/pos_embedprojConv_90.const",
"file_size": 3072
},
"/transformer_blocks.3/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 315750400,
"file_name": "cache/pos_embedprojConv_91.const",
"file_size": 3072
},
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 315753472,
"file_name": "cache/pos_embedprojConv_92.const",
"file_size": 5382144
},
"/transformer_blocks.3/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 321135616,
"file_name": "cache/pos_embedprojConv_93.const",
"file_size": 3072
},
"/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 321138688,
"file_name": "cache/pos_embedprojConv_94.const",
"file_size": 3072
},
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 321141760,
"file_name": "cache/pos_embedprojConv_95.const",
"file_size": 5382144
},
"onnx::MatMul_2022_onnx::MatMul_2019": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 326523904,
"file_name": "cache/pos_embedprojConv_96.const",
"file_size": 5382144
},
"onnx::MatMul_2023_onnx::MatMul_2020": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 331906048,
"file_name": "cache/pos_embedprojConv_97.const",
"file_size": 5382144
},
"onnx::MatMul_2024_onnx::MatMul_2021": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 337288192,
"file_name": "cache/pos_embedprojConv_98.const",
"file_size": 5382144
},
"onnx::MatMul_2038": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 342670336,
"file_name": "cache/pos_embedprojConv_99.const",
"file_size": 2691072
},
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 345361408,
"file_name": "cache/pos_embedprojConv_100.const",
"file_size": 2691072
},
"/transformer_blocks.3/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 348052480,
"file_name": "cache/pos_embedprojConv_101.const",
"file_size": 3072
},
"/transformer_blocks.3/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 348055552,
"file_name": "cache/pos_embedprojConv_102.const",
"file_size": 3072
},
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 348058624,
"file_name": "cache/pos_embedprojConv_103.const",
"file_size": 5382144
},
"onnx::MatMul_2040": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 353440768,
"file_name": "cache/pos_embedprojConv_104.const",
"file_size": 10764288
},
"onnx::MatMul_2041": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 364205056,
"file_name": "cache/pos_embedprojConv_105.const",
"file_size": 10764288
},
"transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 374969344,
"file_name": "cache/pos_embedprojConv_106.const",
"file_size": 2691072
},
"onnx::MatMul_2091": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 377660416,
"file_name": "cache/pos_embedprojConv_107.const",
"file_size": 2691072
},
"onnx::MatMul_2039": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 380351488,
"file_name": "cache/pos_embedprojConv_108.const",
"file_size": 2691072
},
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 383042560,
"file_name": "cache/pos_embedprojConv_109.const",
"file_size": 2691072
},
"/transformer_blocks.3/norm2_context/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 385733632,
"file_name": "cache/pos_embedprojConv_110.const",
"file_size": 3072
},
"/transformer_blocks.3/norm2_context/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 385736704,
"file_name": "cache/pos_embedprojConv_111.const",
"file_size": 3072
},
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 385739776,
"file_name": "cache/pos_embedprojConv_112.const",
"file_size": 5382144
},
"onnx::MatMul_2042": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 391121920,
"file_name": "cache/pos_embedprojConv_113.const",
"file_size": 10764288
},
"onnx::MatMul_2043": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 401886208,
"file_name": "cache/pos_embedprojConv_114.const",
"file_size": 10764288
},
"transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 412650496,
"file_name": "cache/pos_embedprojConv_115.const",
"file_size": 2691072
},
"/transformer_blocks.4/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 415341568,
"file_name": "cache/pos_embedprojConv_116.const",
"file_size": 3072
},
"/transformer_blocks.4/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 415344640,
"file_name": "cache/pos_embedprojConv_117.const",
"file_size": 3072
},
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 415347712,
"file_name": "cache/pos_embedprojConv_118.const",
"file_size": 5382144
},
"/transformer_blocks.4/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 420729856,
"file_name": "cache/pos_embedprojConv_119.const",
"file_size": 3072
},
"/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 420732928,
"file_name": "cache/pos_embedprojConv_120.const",
"file_size": 3072
},
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 420736000,
"file_name": "cache/pos_embedprojConv_121.const",
"file_size": 5382144
},
"onnx::MatMul_2047_onnx::MatMul_2044": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 426118144,
"file_name": "cache/pos_embedprojConv_122.const",
"file_size": 5382144
},
"onnx::MatMul_2048_onnx::MatMul_2045": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 431500288,
"file_name": "cache/pos_embedprojConv_123.const",
"file_size": 5382144
},
"onnx::MatMul_2049_onnx::MatMul_2046": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 436882432,
"file_name": "cache/pos_embedprojConv_124.const",
"file_size": 5382144
},
"onnx::MatMul_2063": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 442264576,
"file_name": "cache/pos_embedprojConv_125.const",
"file_size": 2691072
},
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 444955648,
"file_name": "cache/pos_embedprojConv_126.const",
"file_size": 2691072
},
"/transformer_blocks.4/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 447646720,
"file_name": "cache/pos_embedprojConv_127.const",
"file_size": 3072
},
"/transformer_blocks.4/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 447649792,
"file_name": "cache/pos_embedprojConv_128.const",
"file_size": 3072
},
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 447652864,
"file_name": "cache/pos_embedprojConv_129.const",
"file_size": 5382144
},
"onnx::MatMul_2065": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 453035008,
"file_name": "cache/pos_embedprojConv_130.const",
"file_size": 10764288
},
"onnx::MatMul_2066": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 463799296,
"file_name": "cache/pos_embedprojConv_131.const",
"file_size": 10764288
},
"transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 474563584,
"file_name": "cache/pos_embedprojConv_132.const",
"file_size": 2691072
},
"onnx::MatMul_2092": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 477254656,
"file_name": "cache/pos_embedprojConv_133.const",
"file_size": 2691072
},
"onnx::MatMul_2064": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 479945728,
"file_name": "cache/pos_embedprojConv_134.const",
"file_size": 2691072
},
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 482636800,
"file_name": "cache/pos_embedprojConv_135.const",
"file_size": 2691072
},
"/transformer_blocks.4/norm2_context/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 485327872,
"file_name": "cache/pos_embedprojConv_136.const",
"file_size": 3072
},
"/transformer_blocks.4/norm2_context/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 485330944,
"file_name": "cache/pos_embedprojConv_137.const",
"file_size": 3072
},
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 485334016,
"file_name": "cache/pos_embedprojConv_138.const",
"file_size": 5382144
},
"onnx::MatMul_2067": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 490716160,
"file_name": "cache/pos_embedprojConv_139.const",
"file_size": 10764288
},
"onnx::MatMul_2068": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 501480448,
"file_name": "cache/pos_embedprojConv_140.const",
"file_size": 10764288
},
"transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 512244736,
"file_name": "cache/pos_embedprojConv_141.const",
"file_size": 2691072
},
"/transformer_blocks.5/norm1/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 514935808,
"file_name": "cache/pos_embedprojConv_142.const",
"file_size": 3072
},
"/transformer_blocks.5/norm1/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 514938880,
"file_name": "cache/pos_embedprojConv_143.const",
"file_size": 3072
},
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 514941952,
"file_name": "cache/pos_embedprojConv_144.const",
"file_size": 5382144
},
"/transformer_blocks.5/norm1_context/norm/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 520324096,
"file_name": "cache/pos_embedprojConv_145.const",
"file_size": 3072
},
"/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 520327168,
"file_name": "cache/pos_embedprojConv_146.const",
"file_size": 3072
},
"transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 520330240,
"file_name": "cache/pos_embedprojConv_147.const",
"file_size": 5382144
},
"onnx::MatMul_2072_onnx::MatMul_2069": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 525712384,
"file_name": "cache/pos_embedprojConv_148.const",
"file_size": 5382144
},
"onnx::MatMul_2073_onnx::MatMul_2070": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 531094528,
"file_name": "cache/pos_embedprojConv_149.const",
"file_size": 5382144
},
"onnx::MatMul_2074_onnx::MatMul_2071": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 536476672,
"file_name": "cache/pos_embedprojConv_150.const",
"file_size": 5382144
},
"onnx::MatMul_2085": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 541858816,
"file_name": "cache/pos_embedprojConv_151.const",
"file_size": 2691072
},
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 544549888,
"file_name": "cache/pos_embedprojConv_152.const",
"file_size": 2691072
},
"/transformer_blocks.5/norm2/Constant_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 547240960,
"file_name": "cache/pos_embedprojConv_153.const",
"file_size": 3072
},
"/transformer_blocks.5/norm2/Constant_1_output_0": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfloat16",
"shape": [
1536
],
"size_in_bytes": 3072,
"op_tensor_size": 3072,
"offset": 547244032,
"file_name": "cache/pos_embedprojConv_154.const",
"file_size": 3072
},
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
5382144
],
"size_in_bytes": 5382144,
"op_tensor_size": 5382144,
"offset": 547247104,
"file_name": "cache/pos_embedprojConv_155.const",
"file_size": 5382144
},
"onnx::MatMul_2086": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 552629248,
"file_name": "cache/pos_embedprojConv_156.const",
"file_size": 10764288
},
"onnx::MatMul_2087": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
10764288
],
"size_in_bytes": 10764288,
"op_tensor_size": 10764288,
"offset": 563393536,
"file_name": "cache/pos_embedprojConv_157.const",
"file_size": 10764288
},
"transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "uint8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 574157824,
"file_name": "cache/pos_embedprojConv_158.const",
"file_size": 2691072
},
"onnx::MatMul_2093": {
"packed_buffer_label": "const",
"xrt_arg_id": 3,
"dtype": "bfp16ebs8",
"shape": [
2691072
],
"size_in_bytes": 2691072,
"op_tensor_size": 2691072,
"offset": 576848896,
"file_name": "cache/pos_embedprojConv_159.const",
"file_size": 2691072
}
},
"dynamic_shape_subgraph": true,
"dynamic_shape_list": [
{
"floor(h/2)": 32,
"max_length + floor(h/2)*floor(w/2)": 1184,
"h": 64,
"w": 64,
"floor(h/2)*floor(w/2)": 1024,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 32
},
{
"floor(h/2)": 32,
"max_length + floor(h/2)*floor(w/2)": 1696,
"h": 64,
"w": 96,
"floor(h/2)*floor(w/2)": 1536,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 48
},
{
"floor(h/2)": 48,
"max_length + floor(h/2)*floor(w/2)": 1696,
"h": 96,
"w": 64,
"floor(h/2)*floor(w/2)": 1536,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 32
},
{
"floor(h/2)": 36,
"max_length + floor(h/2)*floor(w/2)": 2464,
"h": 72,
"w": 128,
"floor(h/2)*floor(w/2)": 2304,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 64
},
{
"floor(h/2)": 64,
"max_length + floor(h/2)*floor(w/2)": 2464,
"h": 128,
"w": 72,
"floor(h/2)*floor(w/2)": 2304,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 36
},
{
"floor(h/2)": 48,
"max_length + floor(h/2)*floor(w/2)": 3232,
"h": 96,
"w": 128,
"floor(h/2)*floor(w/2)": 3072,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 64
},
{
"floor(h/2)": 64,
"max_length + floor(h/2)*floor(w/2)": 3232,
"h": 128,
"w": 96,
"floor(h/2)*floor(w/2)": 3072,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 48
},
{
"floor(h/2)": 64,
"max_length + floor(h/2)*floor(w/2)": 4256,
"h": 128,
"w": 128,
"floor(h/2)*floor(w/2)": 4096,
"batch_size": 2,
"max_length": 160,
"floor(w/2)": 64
}
],
"aux_info": {}
}