diff --git "a/vae_decoder/dd/cache/NhwcConv_0-post_quant_convConv_meta.json" "b/vae_decoder/dd/cache/NhwcConv_0-post_quant_convConv_meta.json" --- "a/vae_decoder/dd/cache/NhwcConv_0-post_quant_convConv_meta.json" +++ "b/vae_decoder/dd/cache/NhwcConv_0-post_quant_convConv_meta.json" @@ -102,8 +102,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -207,8 +206,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -220,10 +218,10 @@ "NhwcConv_1_out-/decoder/conv_in/Conv_output_0.out0_1_1" ], "const_args": [ - "GroupNorm_0_wts_4_1_0" + "GroupNorm_0_wts_4_0_0" ], "out_args": [ - "GroupNorm_0.out9_0.out4_1_0" + "GroupNorm_0.out12_0.out4_0_0" ], "attrs": { "epsilon": { @@ -299,61 +297,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_0", - "type": "SDSilu", - "in_args": [ - "GroupNorm_0.out9_0.out4_1_0" - ], - "const_args": [ - "Sigmoid_0.weights6_0" - ], - "out_args": [ - "GroupNorm_0_sigmoid_out.9_0.out6_0" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -368,7 +315,7 @@ "name": "NhwcConv_2-/decoder/mid_block/resnets.0/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_0_sigmoid_out.9_0.out6_0" + "GroupNorm_0.out12_0.out4_0_0" ], "const_args": [ "NhwcConv_2_weight_NHWC" @@ -463,8 +410,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -476,10 +422,10 @@ "NhwcConv_2_out-/decoder/mid_block/resnets.0/conv1/Conv_output_0.out0_1_2" ], "const_args": [ - "GroupNorm_1_wts_4_1_1" + "GroupNorm_1_wts_4_0_1" ], "out_args": [ - "GroupNorm_1.out9_1.out4_1_1" + "GroupNorm_1.out12_1.out4_0_1" ], "attrs": { "epsilon": { @@ -555,61 +501,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_1", - "type": "SDSilu", - "in_args": [ - "GroupNorm_1.out9_1.out4_1_1" - ], - "const_args": [ - "Sigmoid_1.weights6_1" - ], - "out_args": [ - "GroupNorm_1_sigmoid_out.9_1.out6_1" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -624,7 +519,7 @@ "name": "NhwcConv_3-/decoder/mid_block/resnets.0/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_1_sigmoid_out.9_1.out6_1" + "GroupNorm_1.out12_1.out4_0_1" ], "const_args": [ "NhwcConv_3_weight_NHWC" @@ -719,8 +614,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -764,6 +658,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -785,10 +692,10 @@ "/decoder/mid_block/resnets.0/Add.out_2_1_0" ], "const_args": [ - "GroupNorm_2_wts_4_0_0" + "GroupNorm_2_wts_4_1_0" ], "out_args": [ - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0" + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0" ], "attrs": { "epsilon": { @@ -873,16 +780,16 @@ } }, { - "name": "/decoder/mid_block/attentions.0/to_k/MatMul", + "name": "/decoder/mid_block/attentions.0/to_k/MatMul/MatMulAddFusion", "type": "SDGemm", "in_args": [ - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0" + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0" ], "const_args": [ - "onnx::MatMul_918" + "onnx::MatMul_918_2_0_0" ], "out_args": [ - "/decoder/mid_block/attentions.0/to_k/Add_output_0.out1_3_0" + "/decoder/mid_block/attentions.0/Reshape_2_output_0_3d.out1_3_0" ], "attrs": { "input_shape": { @@ -908,14 +815,6 @@ "512" ] }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16", - "bfp16ebs8", - "bfloat16" - ] - }, "out_dtypes": { "type": "str", "value": [ @@ -933,20 +832,28 @@ "value": [ "1" ] + }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfp16ebs8", + "bfloat16" + ] } } }, { - "name": "/decoder/mid_block/attentions.0/to_q/MatMul", + "name": "/decoder/mid_block/attentions.0/to_q/MatMul/MatMulAddFusion", "type": "SDGemm", "in_args": [ - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0" + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0" ], "const_args": [ - "onnx::MatMul_917" + "onnx::MatMul_917_2_0_1" ], "out_args": [ - "/decoder/mid_block/attentions.0/to_q/Add_output_0.out1_3_1" + "/decoder/mid_block/attentions.0/Reshape_1_output_0_3d.out1_3_1" ], "attrs": { "input_shape": { @@ -972,14 +879,6 @@ "512" ] }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16", - "bfp16ebs8", - "bfloat16" - ] - }, "out_dtypes": { "type": "str", "value": [ @@ -997,20 +896,28 @@ "value": [ "1" ] + }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfp16ebs8", + "bfloat16" + ] } } }, { - "name": "/decoder/mid_block/attentions.0/to_v/MatMul", + "name": "/decoder/mid_block/attentions.0/to_v/MatMul/MatMulAddFusion", "type": "SDGemm", "in_args": [ - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0" + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0" ], "const_args": [ - "onnx::MatMul_919" + "onnx::MatMul_919_2_0_2" ], "out_args": [ - "/decoder/mid_block/attentions.0/to_v/Add_output_0.out1_3_2" + "/decoder/mid_block/attentions.0/Reshape_3_output_0_3d.out1_3_2" ], "attrs": { "input_shape": { @@ -1036,14 +943,6 @@ "512" ] }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16", - "bfp16ebs8", - "bfloat16" - ] - }, "out_dtypes": { "type": "str", "value": [ @@ -1061,22 +960,30 @@ "value": [ "1" ] + }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfp16ebs8", + "bfloat16" + ] } } }, { - "name": "/decoder/mid_block/attentions.0/MatMulmha_1_0", - "type": "SDMHA", + "name": "/decoder/mid_block/attentions.0/MatMul/MatMulScaleFusion/_FusedActivationmha_3_1_0", + "type": "SDMHA_VAE", "in_args": [ - "/decoder/mid_block/attentions.0/to_q/Add_output_0.out1_3_1", - "/decoder/mid_block/attentions.0/to_k/Add_output_0.out1_3_0", - "/decoder/mid_block/attentions.0/to_v/Add_output_0.out1_3_2" + "/decoder/mid_block/attentions.0/Reshape_1_output_0_3d.out1_3_1", + "/decoder/mid_block/attentions.0/Reshape_2_output_0_3d.out1_3_0", + "/decoder/mid_block/attentions.0/Reshape_3_output_0_3d.out1_3_2" ], "const_args": [ - "/decoder/mid_block/attentions.0/MatMulmha_1_0_mask.10_0" + "/decoder/mid_block/attentions.0/MatMul/MatMulScaleFusion/_FusedActivationmha_3_1_0_mask.10_0" ], "out_args": [ - "/decoder/mid_block/attentions.0/Reshape_4_output_0.out10_0" + "/decoder/mid_block/attentions.0/Transpose_5_output_0_3d.out10_0" ], "attrs": { "num_heads": { @@ -1103,7 +1010,6 @@ "in_dtypes": { "type": "str", "value": [ - "bfloat16", "bfloat16", "bfloat16" ] @@ -1129,16 +1035,16 @@ } }, { - "name": "/decoder/mid_block/attentions.0/to_out.0/MatMul", + "name": "/decoder/mid_block/attentions.0/to_out.0/MatMul/MatMulAddFusion", "type": "SDGemm", "in_args": [ - "/decoder/mid_block/attentions.0/Reshape_4_output_0.out10_0" + "/decoder/mid_block/attentions.0/Transpose_5_output_0_3d.out10_0" ], "const_args": [ - "onnx::MatMul_927" + "onnx::MatMul_927_2_0_3" ], "out_args": [ - "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc3_0.out1_0_0" + "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc6_0.out1_0_0" ], "attrs": { "input_shape": { @@ -1164,14 +1070,6 @@ "512" ] }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16", - "bfp16ebs8", - "bfloat16" - ] - }, "out_dtypes": { "type": "str", "value": [ @@ -1189,6 +1087,14 @@ "value": [ "1" ] + }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfp16ebs8", + "bfloat16" + ] } } }, @@ -1196,7 +1102,7 @@ "name": "/decoder/mid_block/attentions.0/Add", "type": "SDAdd", "in_args": [ - "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc3_0.out1_0_0", + "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc6_0.out1_0_0", "/decoder/mid_block/resnets.0/Add.out_2_1_0" ], "const_args": [], @@ -1231,6 +1137,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -1252,10 +1171,10 @@ "/decoder/mid_block/attentions.0/Add_output_0_NHWC.out_2_1_1" ], "const_args": [ - "GroupNorm_3_wts_4_1_2" + "GroupNorm_3_wts_4_0_2" ], "out_args": [ - "GroupNorm_3.out9_3.out4_1_2" + "GroupNorm_3.out12_3.out4_0_2" ], "attrs": { "epsilon": { @@ -1331,61 +1250,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_2", - "type": "SDSilu", - "in_args": [ - "GroupNorm_3.out9_3.out4_1_2" - ], - "const_args": [ - "Sigmoid_2.weights6_2" - ], - "out_args": [ - "GroupNorm_3_sigmoid_out.9_3.out6_2" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -1400,7 +1268,7 @@ "name": "NhwcConv_4-/decoder/mid_block/resnets.1/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_3_sigmoid_out.9_3.out6_2" + "GroupNorm_3.out12_3.out4_0_2" ], "const_args": [ "NhwcConv_4_weight_NHWC" @@ -1495,8 +1363,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -1508,10 +1375,10 @@ "NhwcConv_4_out-/decoder/mid_block/resnets.1/conv1/Conv_output_0.out0_1_4" ], "const_args": [ - "GroupNorm_4_wts_4_1_3" + "GroupNorm_4_wts_4_0_3" ], "out_args": [ - "GroupNorm_4.out9_4.out4_1_3" + "GroupNorm_4.out12_4.out4_0_3" ], "attrs": { "epsilon": { @@ -1587,6 +1454,12 @@ "512" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -1596,82 +1469,25 @@ } }, { - "name": "Sigmoid_3", - "type": "SDSilu", + "name": "NhwcConv_5-/decoder/mid_block/resnets.1/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_4.out9_4.out4_1_3" + "GroupNorm_4.out12_4.out4_0_3" ], "const_args": [ - "Sigmoid_3.weights6_3" + "NhwcConv_5_weight_NHWC" ], "out_args": [ - "GroupNorm_4_sigmoid_out.9_4.out6_3" + "NhwcConv_5_out-/decoder/mid_block/resnets.1/conv2/Conv_output_0.out0_1_5" ], "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_5-/decoder/mid_block/resnets.1/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_4_sigmoid_out.9_4.out6_3" - ], - "const_args": [ - "NhwcConv_5_weight_NHWC" - ], - "out_args": [ - "NhwcConv_5_out-/decoder/mid_block/resnets.1/conv2/Conv_output_0.out0_1_5" - ], - "attrs": { - "auto_pad": { - "type": "str", - "value": [ - "NOTSET" - ] - }, - "dilations": { + "auto_pad": { + "type": "str", + "value": [ + "NOTSET" + ] + }, + "dilations": { "type": "int", "value": [ "1", @@ -1751,8 +1567,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -1796,6 +1611,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -1817,10 +1645,10 @@ "/decoder/mid_block/resnets.1/Add_output_0_NHWC.out_2_1_2" ], "const_args": [ - "GroupNorm_5_wts_4_1_4" + "GroupNorm_5_wts_4_0_4" ], "out_args": [ - "GroupNorm_5.out9_5.out4_1_4" + "GroupNorm_5.out12_5.out4_0_4" ], "attrs": { "epsilon": { @@ -1896,61 +1724,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_4", - "type": "SDSilu", - "in_args": [ - "GroupNorm_5.out9_5.out4_1_4" - ], - "const_args": [ - "Sigmoid_4.weights6_4" - ], - "out_args": [ - "GroupNorm_5_sigmoid_out.9_5.out6_4" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -1965,7 +1742,7 @@ "name": "NhwcConv_6-/decoder/up_blocks.0/resnets.0/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_5_sigmoid_out.9_5.out6_4" + "GroupNorm_5.out12_5.out4_0_4" ], "const_args": [ "NhwcConv_6_weight_NHWC" @@ -2060,8 +1837,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -2073,10 +1849,10 @@ "NhwcConv_6_out-/decoder/up_blocks.0/resnets.0/conv1/Conv_output_0.out0_1_6" ], "const_args": [ - "GroupNorm_6_wts_4_1_5" + "GroupNorm_6_wts_4_0_5" ], "out_args": [ - "GroupNorm_6.out9_6.out4_1_5" + "GroupNorm_6.out12_6.out4_0_5" ], "attrs": { "epsilon": { @@ -2152,61 +1928,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_5", - "type": "SDSilu", - "in_args": [ - "GroupNorm_6.out9_6.out4_1_5" - ], - "const_args": [ - "Sigmoid_5.weights6_5" - ], - "out_args": [ - "GroupNorm_6_sigmoid_out.9_6.out6_5" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -2221,7 +1946,7 @@ "name": "NhwcConv_7-/decoder/up_blocks.0/resnets.0/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_6_sigmoid_out.9_6.out6_5" + "GroupNorm_6.out12_6.out4_0_5" ], "const_args": [ "NhwcConv_7_weight_NHWC" @@ -2316,8 +2041,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -2361,6 +2085,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -2382,10 +2119,10 @@ "/decoder/up_blocks.0/resnets.0/Add_output_0_NHWC.out_2_1_3" ], "const_args": [ - "GroupNorm_7_wts_4_1_6" + "GroupNorm_7_wts_4_0_6" ], "out_args": [ - "GroupNorm_7.out9_7.out4_1_6" + "GroupNorm_7.out12_7.out4_0_6" ], "attrs": { "epsilon": { @@ -2461,61 +2198,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_6", - "type": "SDSilu", - "in_args": [ - "GroupNorm_7.out9_7.out4_1_6" - ], - "const_args": [ - "Sigmoid_6.weights6_6" - ], - "out_args": [ - "GroupNorm_7_sigmoid_out.9_7.out6_6" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -2530,7 +2216,7 @@ "name": "NhwcConv_8-/decoder/up_blocks.0/resnets.1/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_7_sigmoid_out.9_7.out6_6" + "GroupNorm_7.out12_7.out4_0_6" ], "const_args": [ "NhwcConv_8_weight_NHWC" @@ -2625,8 +2311,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -2638,10 +2323,10 @@ "NhwcConv_8_out-/decoder/up_blocks.0/resnets.1/conv1/Conv_output_0.out0_1_8" ], "const_args": [ - "GroupNorm_8_wts_4_1_7" + "GroupNorm_8_wts_4_0_7" ], "out_args": [ - "GroupNorm_8.out9_8.out4_1_7" + "GroupNorm_8.out12_8.out4_0_7" ], "attrs": { "epsilon": { @@ -2717,6 +2402,12 @@ "512" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -2726,82 +2417,25 @@ } }, { - "name": "Sigmoid_7", - "type": "SDSilu", + "name": "NhwcConv_9-/decoder/up_blocks.0/resnets.1/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_8.out9_8.out4_1_7" + "GroupNorm_8.out12_8.out4_0_7" ], "const_args": [ - "Sigmoid_7.weights6_7" + "NhwcConv_9_weight_NHWC" ], "out_args": [ - "GroupNorm_8_sigmoid_out.9_8.out6_7" + "NhwcConv_9_out-/decoder/up_blocks.0/resnets.1/conv2/Conv_output_0.out0_1_9" ], "attrs": { - "input_shape": { - "type": "int", + "auto_pad": { + "type": "str", "value": [ - "1", - "64", - "64", - "512" + "NOTSET" ] }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_9-/decoder/up_blocks.0/resnets.1/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_8_sigmoid_out.9_8.out6_7" - ], - "const_args": [ - "NhwcConv_9_weight_NHWC" - ], - "out_args": [ - "NhwcConv_9_out-/decoder/up_blocks.0/resnets.1/conv2/Conv_output_0.out0_1_9" - ], - "attrs": { - "auto_pad": { - "type": "str", - "value": [ - "NOTSET" - ] - }, - "dilations": { + "dilations": { "type": "int", "value": [ "1", @@ -2881,8 +2515,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -2926,6 +2559,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -2947,10 +2593,10 @@ "/decoder/up_blocks.0/resnets.1/Add_output_0_NHWC.out_2_1_4" ], "const_args": [ - "GroupNorm_9_wts_4_1_8" + "GroupNorm_9_wts_4_0_8" ], "out_args": [ - "GroupNorm_9.out9_9.out4_1_8" + "GroupNorm_9.out12_9.out4_0_8" ], "attrs": { "epsilon": { @@ -3026,61 +2672,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_8", - "type": "SDSilu", - "in_args": [ - "GroupNorm_9.out9_9.out4_1_8" - ], - "const_args": [ - "Sigmoid_8.weights6_8" - ], - "out_args": [ - "GroupNorm_9_sigmoid_out.9_9.out6_8" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -3095,7 +2690,7 @@ "name": "NhwcConv_10-/decoder/up_blocks.0/resnets.2/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_9_sigmoid_out.9_9.out6_8" + "GroupNorm_9.out12_9.out4_0_8" ], "const_args": [ "NhwcConv_10_weight_NHWC" @@ -3190,8 +2785,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -3203,10 +2797,10 @@ "NhwcConv_10_out-/decoder/up_blocks.0/resnets.2/conv1/Conv_output_0.out0_1_10" ], "const_args": [ - "GroupNorm_10_wts_4_1_9" + "GroupNorm_10_wts_4_0_9" ], "out_args": [ - "GroupNorm_10.out9_10.out4_1_9" + "GroupNorm_10.out12_10.out4_0_9" ], "attrs": { "epsilon": { @@ -3282,61 +2876,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_9", - "type": "SDSilu", - "in_args": [ - "GroupNorm_10.out9_10.out4_1_9" - ], - "const_args": [ - "Sigmoid_9.weights6_9" - ], - "out_args": [ - "GroupNorm_10_sigmoid_out.9_10.out6_9" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "64", - "64", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -3351,7 +2894,7 @@ "name": "NhwcConv_11-/decoder/up_blocks.0/resnets.2/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_10_sigmoid_out.9_10.out6_9" + "GroupNorm_10.out12_10.out4_0_9" ], "const_args": [ "NhwcConv_11_weight_NHWC" @@ -3446,8 +2989,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -3461,7 +3003,7 @@ ], "const_args": [], "out_args": [ - "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc2_0.out_2_1_5" + "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc5_0.out_2_1_5" ], "attrs": { "a_shape": { @@ -3491,6 +3033,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -3509,13 +3064,13 @@ "name": "/decoder/up_blocks.0/upsamplers.0/Resize", "type": "SDResize", "in_args": [ - "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc2_0.out_2_1_5" + "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc5_0.out_2_1_5" ], "const_args": [ "/decoder/up_blocks.0/upsamplers.0/Resize.weights7_0" ], "out_args": [ - "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc2_0.out_7_0" + "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc5_0.out_7_0" ], "attrs": { "a_shape": { @@ -3536,6 +3091,18 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -3548,7 +3115,7 @@ "name": "NhwcConv_12-/decoder/up_blocks.0/upsamplers.0/conv/Conv", "type": "SDConv", "in_args": [ - "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc2_0.out_7_0" + "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc5_0.out_7_0" ], "const_args": [ "NhwcConv_12_weight_NHWC" @@ -3643,8 +3210,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -3656,10 +3222,10 @@ "NhwcConv_12_out-/decoder/up_blocks.0/upsamplers.0/conv/Conv_output_0.out0_1_12" ], "const_args": [ - "GroupNorm_11_wts_4_1_10" + "GroupNorm_11_wts_4_0_10" ], "out_args": [ - "GroupNorm_11.out9_11.out4_1_10" + "GroupNorm_11.out12_11.out4_0_10" ], "attrs": { "epsilon": { @@ -3735,61 +3301,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_10", - "type": "SDSilu", - "in_args": [ - "GroupNorm_11.out9_11.out4_1_10" - ], - "const_args": [ - "Sigmoid_10.weights6_10" - ], - "out_args": [ - "GroupNorm_11_sigmoid_out.9_11.out6_10" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -3804,7 +3319,7 @@ "name": "NhwcConv_13-/decoder/up_blocks.1/resnets.0/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_11_sigmoid_out.9_11.out6_10" + "GroupNorm_11.out12_11.out4_0_10" ], "const_args": [ "NhwcConv_13_weight_NHWC" @@ -3899,8 +3414,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -3912,10 +3426,10 @@ "NhwcConv_13_out-/decoder/up_blocks.1/resnets.0/conv1/Conv_output_0.out0_1_13" ], "const_args": [ - "GroupNorm_12_wts_4_1_11" + "GroupNorm_12_wts_4_0_11" ], "out_args": [ - "GroupNorm_12.out9_12.out4_1_11" + "GroupNorm_12.out12_12.out4_0_11" ], "attrs": { "epsilon": { @@ -3991,6 +3505,12 @@ "512" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -4000,79 +3520,22 @@ } }, { - "name": "Sigmoid_11", - "type": "SDSilu", + "name": "NhwcConv_14-/decoder/up_blocks.1/resnets.0/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_12.out9_12.out4_1_11" + "GroupNorm_12.out12_12.out4_0_11" ], "const_args": [ - "Sigmoid_11.weights6_11" + "NhwcConv_14_weight_NHWC" ], "out_args": [ - "GroupNorm_12_sigmoid_out.9_12.out6_11" + "NhwcConv_14_out-/decoder/up_blocks.1/resnets.0/conv2/Conv_output_0.out0_1_14" ], "attrs": { - "input_shape": { - "type": "int", + "auto_pad": { + "type": "str", "value": [ - "1", - "128", - "128", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_14-/decoder/up_blocks.1/resnets.0/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_12_sigmoid_out.9_12.out6_11" - ], - "const_args": [ - "NhwcConv_14_weight_NHWC" - ], - "out_args": [ - "NhwcConv_14_out-/decoder/up_blocks.1/resnets.0/conv2/Conv_output_0.out0_1_14" - ], - "attrs": { - "auto_pad": { - "type": "str", - "value": [ - "NOTSET" + "NOTSET" ] }, "dilations": { @@ -4155,8 +3618,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -4200,6 +3662,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -4221,10 +3696,10 @@ "/decoder/up_blocks.1/resnets.0/Add.out_2_1_6" ], "const_args": [ - "GroupNorm_13_wts_4_1_12" + "GroupNorm_13_wts_4_0_12" ], "out_args": [ - "GroupNorm_13.out9_13.out4_1_12" + "GroupNorm_13.out12_13.out4_0_12" ], "attrs": { "epsilon": { @@ -4300,61 +3775,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_12", - "type": "SDSilu", - "in_args": [ - "GroupNorm_13.out9_13.out4_1_12" - ], - "const_args": [ - "Sigmoid_12.weights6_12" - ], - "out_args": [ - "GroupNorm_13_sigmoid_out.9_13.out6_12" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -4369,7 +3793,7 @@ "name": "NhwcConv_15-/decoder/up_blocks.1/resnets.1/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_13_sigmoid_out.9_13.out6_12" + "GroupNorm_13.out12_13.out4_0_12" ], "const_args": [ "NhwcConv_15_weight_NHWC" @@ -4464,8 +3888,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -4477,10 +3900,10 @@ "NhwcConv_15_out-/decoder/up_blocks.1/resnets.1/conv1/Conv_output_0.out0_1_15" ], "const_args": [ - "GroupNorm_14_wts_4_1_13" + "GroupNorm_14_wts_4_0_13" ], "out_args": [ - "GroupNorm_14.out9_14.out4_1_13" + "GroupNorm_14.out12_14.out4_0_13" ], "attrs": { "epsilon": { @@ -4556,61 +3979,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_13", - "type": "SDSilu", - "in_args": [ - "GroupNorm_14.out9_14.out4_1_13" - ], - "const_args": [ - "Sigmoid_13.weights6_13" - ], - "out_args": [ - "GroupNorm_14_sigmoid_out.9_14.out6_13" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -4625,7 +3997,7 @@ "name": "NhwcConv_16-/decoder/up_blocks.1/resnets.1/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_14_sigmoid_out.9_14.out6_13" + "GroupNorm_14.out12_14.out4_0_13" ], "const_args": [ "NhwcConv_16_weight_NHWC" @@ -4720,8 +4092,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -4765,6 +4136,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -4786,10 +4170,10 @@ "/decoder/up_blocks.1/resnets.1/Add.out_2_1_7" ], "const_args": [ - "GroupNorm_15_wts_4_1_14" + "GroupNorm_15_wts_4_0_14" ], "out_args": [ - "GroupNorm_15.out9_15.out4_1_14" + "GroupNorm_15.out12_15.out4_0_14" ], "attrs": { "epsilon": { @@ -4865,61 +4249,10 @@ "512" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_14", - "type": "SDSilu", - "in_args": [ - "GroupNorm_15.out9_15.out4_1_14" - ], - "const_args": [ - "Sigmoid_14.weights6_14" - ], - "out_args": [ - "GroupNorm_15_sigmoid_out.9_15.out6_14" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "128", - "128", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -4934,7 +4267,7 @@ "name": "NhwcConv_17-/decoder/up_blocks.1/resnets.2/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_15_sigmoid_out.9_15.out6_14" + "GroupNorm_15.out12_15.out4_0_14" ], "const_args": [ "NhwcConv_17_weight_NHWC" @@ -5029,8 +4362,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -5042,10 +4374,10 @@ "NhwcConv_17_out-/decoder/up_blocks.1/resnets.2/conv1/Conv_output_0.out0_1_17" ], "const_args": [ - "GroupNorm_16_wts_4_1_15" + "GroupNorm_16_wts_4_0_15" ], "out_args": [ - "GroupNorm_16.out9_16.out4_1_15" + "GroupNorm_16.out12_16.out4_0_15" ], "attrs": { "epsilon": { @@ -5121,6 +4453,12 @@ "512" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -5130,92 +4468,35 @@ } }, { - "name": "Sigmoid_15", - "type": "SDSilu", + "name": "NhwcConv_18-/decoder/up_blocks.1/resnets.2/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_16.out9_16.out4_1_15" + "GroupNorm_16.out12_16.out4_0_15" ], "const_args": [ - "Sigmoid_15.weights6_15" + "NhwcConv_18_weight_NHWC" ], "out_args": [ - "GroupNorm_16_sigmoid_out.9_16.out6_15" + "NhwcConv_18_out-/decoder/up_blocks.1/resnets.2/conv2/Conv_output_0.out0_1_18" ], "attrs": { - "input_shape": { - "type": "int", + "auto_pad": { + "type": "str", "value": [ - "1", - "128", - "128", - "512" + "NOTSET" ] }, - "output_shape": { + "dilations": { "type": "int", "value": [ "1", - "128", - "128", - "512" + "1" ] }, - "in_dtypes": { - "type": "str", + "group": { + "type": "int", "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_18-/decoder/up_blocks.1/resnets.2/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_16_sigmoid_out.9_16.out6_15" - ], - "const_args": [ - "NhwcConv_18_weight_NHWC" - ], - "out_args": [ - "NhwcConv_18_out-/decoder/up_blocks.1/resnets.2/conv2/Conv_output_0.out0_1_18" - ], - "attrs": { - "auto_pad": { - "type": "str", - "value": [ - "NOTSET" - ] - }, - "dilations": { - "type": "int", - "value": [ - "1", - "1" - ] - }, - "group": { - "type": "int", - "value": [ - "1" + "1" ] }, "kernel_shape": { @@ -5285,8 +4566,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -5330,6 +4610,19 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -5354,7 +4647,7 @@ "/decoder/up_blocks.1/upsamplers.0/Resize.weights7_1" ], "out_args": [ - "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc2_1.out_7_1" + "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc5_1.out_7_1" ], "attrs": { "a_shape": { @@ -5375,6 +4668,18 @@ "512" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -5387,7 +4692,7 @@ "name": "NhwcConv_19-/decoder/up_blocks.1/upsamplers.0/conv/Conv", "type": "SDConv", "in_args": [ - "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc2_1.out_7_1" + "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc5_1.out_7_1" ], "const_args": [ "NhwcConv_19_weight_NHWC" @@ -5482,8 +4787,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -5495,10 +4799,10 @@ "NhwcConv_19_out-/decoder/up_blocks.1/upsamplers.0/conv/Conv_output_0.out0_1_19" ], "const_args": [ - "GroupNorm_17_wts_4_1_16" + "GroupNorm_17_wts_4_0_16" ], "out_args": [ - "GroupNorm_17.out9_17.out4_1_16" + "GroupNorm_17.out12_17.out4_0_16" ], "attrs": { "epsilon": { @@ -5574,6 +4878,12 @@ "512" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -5681,65 +4991,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" - ] - } - } - }, - { - "name": "Sigmoid_16", - "type": "SDSilu", - "in_args": [ - "GroupNorm_17.out9_17.out4_1_16" - ], - "const_args": [ - "Sigmoid_16.weights6_16" - ], - "out_args": [ - "GroupNorm_17_sigmoid_out.9_17.out6_16" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "512" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "512" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" + "float" ] } } @@ -5748,7 +5000,7 @@ "name": "NhwcConv_21-/decoder/up_blocks.2/resnets.0/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_17_sigmoid_out.9_17.out6_16" + "GroupNorm_17.out12_17.out4_0_16" ], "const_args": [ "NhwcConv_21_weight_NHWC" @@ -5843,8 +5095,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -5856,10 +5107,10 @@ "NhwcConv_21_out-/decoder/up_blocks.2/resnets.0/conv1/Conv_output_0.out0_1_21" ], "const_args": [ - "GroupNorm_18_wts_4_1_17" + "GroupNorm_18_wts_4_0_17" ], "out_args": [ - "GroupNorm_18.out9_18.out4_1_17" + "GroupNorm_18.out12_18.out4_0_17" ], "attrs": { "epsilon": { @@ -5935,61 +5186,10 @@ "256" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_17", - "type": "SDSilu", - "in_args": [ - "GroupNorm_18.out9_18.out4_1_17" - ], - "const_args": [ - "Sigmoid_17.weights6_17" - ], - "out_args": [ - "GroupNorm_18_sigmoid_out.9_18.out6_17" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -6004,7 +5204,7 @@ "name": "NhwcConv_22-/decoder/up_blocks.2/resnets.0/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_18_sigmoid_out.9_18.out6_17" + "GroupNorm_18.out12_18.out4_0_17" ], "const_args": [ "NhwcConv_22_weight_NHWC" @@ -6099,8 +5299,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -6144,6 +5343,19 @@ "256" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -6165,10 +5377,10 @@ "/decoder/up_blocks.2/resnets.0/Add.out_2_1_9" ], "const_args": [ - "GroupNorm_19_wts_4_1_18" + "GroupNorm_19_wts_4_0_18" ], "out_args": [ - "GroupNorm_19.out9_19.out4_1_18" + "GroupNorm_19.out12_19.out4_0_18" ], "attrs": { "epsilon": { @@ -6244,61 +5456,10 @@ "256" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_18", - "type": "SDSilu", - "in_args": [ - "GroupNorm_19.out9_19.out4_1_18" - ], - "const_args": [ - "Sigmoid_18.weights6_18" - ], - "out_args": [ - "GroupNorm_19_sigmoid_out.9_19.out6_18" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -6313,7 +5474,7 @@ "name": "NhwcConv_23-/decoder/up_blocks.2/resnets.1/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_19_sigmoid_out.9_19.out6_18" + "GroupNorm_19.out12_19.out4_0_18" ], "const_args": [ "NhwcConv_23_weight_NHWC" @@ -6408,8 +5569,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -6421,10 +5581,10 @@ "NhwcConv_23_out-/decoder/up_blocks.2/resnets.1/conv1/Conv_output_0.out0_1_23" ], "const_args": [ - "GroupNorm_20_wts_4_1_19" + "GroupNorm_20_wts_4_0_19" ], "out_args": [ - "GroupNorm_20.out9_20.out4_1_19" + "GroupNorm_20.out12_20.out4_0_19" ], "attrs": { "epsilon": { @@ -6500,6 +5660,12 @@ "256" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -6509,73 +5675,16 @@ } }, { - "name": "Sigmoid_19", - "type": "SDSilu", + "name": "NhwcConv_24-/decoder/up_blocks.2/resnets.1/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_20.out9_20.out4_1_19" + "GroupNorm_20.out12_20.out4_0_19" ], "const_args": [ - "Sigmoid_19.weights6_19" + "NhwcConv_24_weight_NHWC" ], "out_args": [ - "GroupNorm_20_sigmoid_out.9_20.out6_19" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_24-/decoder/up_blocks.2/resnets.1/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_20_sigmoid_out.9_20.out6_19" - ], - "const_args": [ - "NhwcConv_24_weight_NHWC" - ], - "out_args": [ - "NhwcConv_24_out-/decoder/up_blocks.2/resnets.1/conv2/Conv_output_0.out0_1_24" + "NhwcConv_24_out-/decoder/up_blocks.2/resnets.1/conv2/Conv_output_0.out0_1_24" ], "attrs": { "auto_pad": { @@ -6664,8 +5773,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -6709,6 +5817,19 @@ "256" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -6730,10 +5851,10 @@ "/decoder/up_blocks.2/resnets.1/Add.out_2_1_10" ], "const_args": [ - "GroupNorm_21_wts_4_1_20" + "GroupNorm_21_wts_4_0_20" ], "out_args": [ - "GroupNorm_21.out9_21.out4_1_20" + "GroupNorm_21.out12_21.out4_0_20" ], "attrs": { "epsilon": { @@ -6809,61 +5930,10 @@ "256" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_20", - "type": "SDSilu", - "in_args": [ - "GroupNorm_21.out9_21.out4_1_20" - ], - "const_args": [ - "Sigmoid_20.weights6_20" - ], - "out_args": [ - "GroupNorm_21_sigmoid_out.9_21.out6_20" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -6878,7 +5948,7 @@ "name": "NhwcConv_25-/decoder/up_blocks.2/resnets.2/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_21_sigmoid_out.9_21.out6_20" + "GroupNorm_21.out12_21.out4_0_20" ], "const_args": [ "NhwcConv_25_weight_NHWC" @@ -6973,8 +6043,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -6986,10 +6055,10 @@ "NhwcConv_25_out-/decoder/up_blocks.2/resnets.2/conv1/Conv_output_0.out0_1_25" ], "const_args": [ - "GroupNorm_22_wts_4_1_21" + "GroupNorm_22_wts_4_0_21" ], "out_args": [ - "GroupNorm_22.out9_22.out4_1_21" + "GroupNorm_22.out12_22.out4_0_21" ], "attrs": { "epsilon": { @@ -7065,61 +6134,10 @@ "256" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_21", - "type": "SDSilu", - "in_args": [ - "GroupNorm_22.out9_22.out4_1_21" - ], - "const_args": [ - "Sigmoid_21.weights6_21" - ], - "out_args": [ - "GroupNorm_22_sigmoid_out.9_22.out6_21" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "256", - "256", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -7134,7 +6152,7 @@ "name": "NhwcConv_26-/decoder/up_blocks.2/resnets.2/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_22_sigmoid_out.9_22.out6_21" + "GroupNorm_22.out12_22.out4_0_21" ], "const_args": [ "NhwcConv_26_weight_NHWC" @@ -7229,8 +6247,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -7274,6 +6291,19 @@ "256" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -7298,7 +6328,7 @@ "/decoder/up_blocks.2/upsamplers.0/Resize.weights7_2" ], "out_args": [ - "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc2_2.out_7_2" + "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc5_2.out_7_2" ], "attrs": { "a_shape": { @@ -7319,6 +6349,18 @@ "256" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -7331,7 +6373,7 @@ "name": "NhwcConv_27-/decoder/up_blocks.2/upsamplers.0/conv/Conv", "type": "SDConv", "in_args": [ - "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc2_2.out_7_2" + "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc5_2.out_7_2" ], "const_args": [ "NhwcConv_27_weight_NHWC" @@ -7426,8 +6468,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -7439,10 +6480,10 @@ "NhwcConv_27_out-/decoder/up_blocks.2/upsamplers.0/conv/Conv_output_0.out0_1_27" ], "const_args": [ - "GroupNorm_23_wts_4_1_22" + "GroupNorm_23_wts_4_0_22" ], "out_args": [ - "GroupNorm_23.out9_23.out4_1_22" + "GroupNorm_23.out12_23.out4_0_22" ], "attrs": { "epsilon": { @@ -7518,6 +6559,12 @@ "256" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -7625,65 +6672,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" - ] - } - } - }, - { - "name": "Sigmoid_22", - "type": "SDSilu", - "in_args": [ - "GroupNorm_23.out9_23.out4_1_22" - ], - "const_args": [ - "Sigmoid_22.weights6_22" - ], - "out_args": [ - "GroupNorm_23_sigmoid_out.9_23.out6_22" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "256" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "256" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" + "float" ] } } @@ -7692,7 +6681,7 @@ "name": "NhwcConv_29-/decoder/up_blocks.3/resnets.0/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_23_sigmoid_out.9_23.out6_22" + "GroupNorm_23.out12_23.out4_0_22" ], "const_args": [ "NhwcConv_29_weight_NHWC" @@ -7787,8 +6776,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -7800,10 +6788,10 @@ "NhwcConv_29_out-/decoder/up_blocks.3/resnets.0/conv1/Conv_output_0.out0_1_29" ], "const_args": [ - "GroupNorm_24_wts_4_1_23" + "GroupNorm_24_wts_4_0_23" ], "out_args": [ - "GroupNorm_24.out9_24.out4_1_23" + "GroupNorm_24.out12_24.out4_0_23" ], "attrs": { "epsilon": { @@ -7877,63 +6865,12 @@ "type": "int", "value": [ "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_23", - "type": "SDSilu", - "in_args": [ - "GroupNorm_24.out9_24.out4_1_23" - ], - "const_args": [ - "Sigmoid_23.weights6_23" - ], - "out_args": [ - "GroupNorm_24_sigmoid_out.9_24.out6_23" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + ] + }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" ] }, "ctrl_packet": { @@ -7948,7 +6885,7 @@ "name": "NhwcConv_30-/decoder/up_blocks.3/resnets.0/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_24_sigmoid_out.9_24.out6_23" + "GroupNorm_24.out12_24.out4_0_23" ], "const_args": [ "NhwcConv_30_weight_NHWC" @@ -8043,8 +6980,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -8088,6 +7024,19 @@ "128" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -8109,10 +7058,10 @@ "/decoder/up_blocks.3/resnets.0/Add.out_2_1_12" ], "const_args": [ - "GroupNorm_25_wts_4_1_24" + "GroupNorm_25_wts_4_0_24" ], "out_args": [ - "GroupNorm_25.out9_25.out4_1_24" + "GroupNorm_25.out12_25.out4_0_24" ], "attrs": { "epsilon": { @@ -8188,61 +7137,10 @@ "128" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_24", - "type": "SDSilu", - "in_args": [ - "GroupNorm_25.out9_25.out4_1_24" - ], - "const_args": [ - "Sigmoid_24.weights6_24" - ], - "out_args": [ - "GroupNorm_25_sigmoid_out.9_25.out6_24" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -8257,7 +7155,7 @@ "name": "NhwcConv_31-/decoder/up_blocks.3/resnets.1/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_25_sigmoid_out.9_25.out6_24" + "GroupNorm_25.out12_25.out4_0_24" ], "const_args": [ "NhwcConv_31_weight_NHWC" @@ -8352,8 +7250,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -8365,10 +7262,10 @@ "NhwcConv_31_out-/decoder/up_blocks.3/resnets.1/conv1/Conv_output_0.out0_1_31" ], "const_args": [ - "GroupNorm_26_wts_4_1_25" + "GroupNorm_26_wts_4_0_25" ], "out_args": [ - "GroupNorm_26.out9_26.out4_1_25" + "GroupNorm_26.out12_26.out4_0_25" ], "attrs": { "epsilon": { @@ -8444,61 +7341,10 @@ "128" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_25", - "type": "SDSilu", - "in_args": [ - "GroupNorm_26.out9_26.out4_1_25" - ], - "const_args": [ - "Sigmoid_25.weights6_25" - ], - "out_args": [ - "GroupNorm_26_sigmoid_out.9_26.out6_25" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -8513,7 +7359,7 @@ "name": "NhwcConv_32-/decoder/up_blocks.3/resnets.1/conv2/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_26_sigmoid_out.9_26.out6_25" + "GroupNorm_26.out12_26.out4_0_25" ], "const_args": [ "NhwcConv_32_weight_NHWC" @@ -8608,8 +7454,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -8653,6 +7498,19 @@ "128" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -8674,10 +7532,10 @@ "/decoder/up_blocks.3/resnets.1/Add.out_2_1_13" ], "const_args": [ - "GroupNorm_27_wts_4_1_26" + "GroupNorm_27_wts_4_0_26" ], "out_args": [ - "GroupNorm_27.out9_27.out4_1_26" + "GroupNorm_27.out12_27.out4_0_26" ], "attrs": { "epsilon": { @@ -8753,61 +7611,10 @@ "128" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_26", - "type": "SDSilu", - "in_args": [ - "GroupNorm_27.out9_27.out4_1_26" - ], - "const_args": [ - "Sigmoid_26.weights6_26" - ], - "out_args": [ - "GroupNorm_27_sigmoid_out.9_27.out6_26" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -8822,7 +7629,7 @@ "name": "NhwcConv_33-/decoder/up_blocks.3/resnets.2/conv1/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_27_sigmoid_out.9_27.out6_26" + "GroupNorm_27.out12_27.out4_0_26" ], "const_args": [ "NhwcConv_33_weight_NHWC" @@ -8917,8 +7724,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -8930,10 +7736,10 @@ "NhwcConv_33_out-/decoder/up_blocks.3/resnets.2/conv1/Conv_output_0.out0_1_33" ], "const_args": [ - "GroupNorm_28_wts_4_1_27" + "GroupNorm_28_wts_4_0_27" ], "out_args": [ - "GroupNorm_28.out9_28.out4_1_27" + "GroupNorm_28.out12_28.out4_0_27" ], "attrs": { "epsilon": { @@ -9009,6 +7815,12 @@ "128" ] }, + "nonlinear": { + "type": "str", + "value": [ + "Silu" + ] + }, "ctrl_packet": { "type": "int", "value": [ @@ -9018,92 +7830,35 @@ } }, { - "name": "Sigmoid_27", - "type": "SDSilu", + "name": "NhwcConv_34-/decoder/up_blocks.3/resnets.2/conv2/Conv", + "type": "SDConv", "in_args": [ - "GroupNorm_28.out9_28.out4_1_27" + "GroupNorm_28.out12_28.out4_0_27" ], "const_args": [ - "Sigmoid_27.weights6_27" + "NhwcConv_34_weight_NHWC" ], "out_args": [ - "GroupNorm_28_sigmoid_out.9_28.out6_27" + "NhwcConv_34_out-/decoder/up_blocks.3/resnets.2/conv2/Conv_output_0.out0_1_34" ], "attrs": { - "input_shape": { - "type": "int", + "auto_pad": { + "type": "str", "value": [ - "1", - "512", - "512", - "128" + "NOTSET" ] }, - "output_shape": { + "dilations": { "type": "int", "value": [ "1", - "512", - "512", - "128" + "1" ] }, - "in_dtypes": { - "type": "str", + "group": { + "type": "int", "value": [ - "bfloat16" - ] - }, - "out_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" - ] - }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "NhwcConv_34-/decoder/up_blocks.3/resnets.2/conv2/Conv", - "type": "SDConv", - "in_args": [ - "GroupNorm_28_sigmoid_out.9_28.out6_27" - ], - "const_args": [ - "NhwcConv_34_weight_NHWC" - ], - "out_args": [ - "NhwcConv_34_out-/decoder/up_blocks.3/resnets.2/conv2/Conv_output_0.out0_1_34" - ], - "attrs": { - "auto_pad": { - "type": "str", - "value": [ - "NOTSET" - ] - }, - "dilations": { - "type": "int", - "value": [ - "1", - "1" - ] - }, - "group": { - "type": "int", - "value": [ - "1" + "1" ] }, "kernel_shape": { @@ -9173,8 +7928,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -9218,6 +7972,19 @@ "128" ] }, + "in_dtypes": { + "type": "str", + "value": [ + "bfloat16", + "bfloat16" + ] + }, + "out_dtypes": { + "type": "str", + "value": [ + "bfloat16" + ] + }, "is_bias_add": { "type": "int", "value": [ @@ -9239,10 +8006,10 @@ "/decoder/up_blocks.3/resnets.2/Add.out_2_1_14" ], "const_args": [ - "GroupNorm_29_wts_4_1_28" + "GroupNorm_29_wts_4_0_28" ], "out_args": [ - "GroupNorm_29.out9_29.out4_1_28" + "GroupNorm_29.out12_29.out4_0_28" ], "attrs": { "epsilon": { @@ -9318,61 +8085,10 @@ "128" ] }, - "ctrl_packet": { - "type": "int", - "value": [ - "1" - ] - } - } - }, - { - "name": "Sigmoid_28", - "type": "SDSilu", - "in_args": [ - "GroupNorm_29.out9_29.out4_1_28" - ], - "const_args": [ - "Sigmoid_28.weights6_28" - ], - "out_args": [ - "GroupNorm_29_sigmoid_out.9_29.out6_28" - ], - "attrs": { - "input_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "output_shape": { - "type": "int", - "value": [ - "1", - "512", - "512", - "128" - ] - }, - "in_dtypes": { - "type": "str", - "value": [ - "bfloat16" - ] - }, - "out_dtypes": { + "nonlinear": { "type": "str", "value": [ - "bfloat16" - ] - }, - "weight_shape": { - "type": "int", - "value": [ - "128" + "Silu" ] }, "ctrl_packet": { @@ -9387,7 +8103,7 @@ "name": "NhwcConv_35-/decoder/conv_out/Conv", "type": "SDConv", "in_args": [ - "GroupNorm_29_sigmoid_out.9_29.out6_28" + "GroupNorm_29.out12_29.out4_0_28" ], "const_args": [ "NhwcConv_35_weight_NHWC" @@ -9482,8 +8198,7 @@ "value": [ "bfloat16", "bfp16ebs8", - "float", - "bfloat16" + "float" ] } } @@ -9505,234 +8220,176 @@ ] }, "scratch": { - "buffer_size": 3519053824, + "buffer_size": 2604695552, "xrt_arg_id": 2, "packed_tensors": [ "NhwcConv_0_out-/post_quant_conv/Conv_output_0.out0_1_0", "NhwcConv_1_out-/decoder/conv_in/Conv_output_0.out0_1_1", - "GroupNorm_0.out9_0.out4_1_0", - "GroupNorm_0_sigmoid_out.9_0.out6_0", + "GroupNorm_0.out12_0.out4_0_0", "NhwcConv_2_out-/decoder/mid_block/resnets.0/conv1/Conv_output_0.out0_1_2", - "GroupNorm_1.out9_1.out4_1_1", - "GroupNorm_1_sigmoid_out.9_1.out6_1", + "GroupNorm_1.out12_1.out4_0_1", "NhwcConv_3_out-/decoder/mid_block/resnets.0/conv2/Conv_output_0.out0_1_3", "/decoder/mid_block/resnets.0/Add.out_2_1_0", - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0", - "/decoder/mid_block/attentions.0/to_k/Add_output_0.out1_3_0", - "/decoder/mid_block/attentions.0/to_q/Add_output_0.out1_3_1", - "/decoder/mid_block/attentions.0/to_v/Add_output_0.out1_3_2", - "/decoder/mid_block/attentions.0/Reshape_4_output_0.out10_0", - "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc3_0.out1_0_0", + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0", + "/decoder/mid_block/attentions.0/Reshape_2_output_0_3d.out1_3_0", + "/decoder/mid_block/attentions.0/Reshape_1_output_0_3d.out1_3_1", + "/decoder/mid_block/attentions.0/Reshape_3_output_0_3d.out1_3_2", + "/decoder/mid_block/attentions.0/Transpose_5_output_0_3d.out10_0", + "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc6_0.out1_0_0", "/decoder/mid_block/attentions.0/Add_output_0_NHWC.out_2_1_1", - "GroupNorm_3.out9_3.out4_1_2", - "GroupNorm_3_sigmoid_out.9_3.out6_2", + "GroupNorm_3.out12_3.out4_0_2", "NhwcConv_4_out-/decoder/mid_block/resnets.1/conv1/Conv_output_0.out0_1_4", - "GroupNorm_4.out9_4.out4_1_3", - "GroupNorm_4_sigmoid_out.9_4.out6_3", + "GroupNorm_4.out12_4.out4_0_3", "NhwcConv_5_out-/decoder/mid_block/resnets.1/conv2/Conv_output_0.out0_1_5", "/decoder/mid_block/resnets.1/Add_output_0_NHWC.out_2_1_2", - "GroupNorm_5.out9_5.out4_1_4", - "GroupNorm_5_sigmoid_out.9_5.out6_4", + "GroupNorm_5.out12_5.out4_0_4", "NhwcConv_6_out-/decoder/up_blocks.0/resnets.0/conv1/Conv_output_0.out0_1_6", - "GroupNorm_6.out9_6.out4_1_5", - "GroupNorm_6_sigmoid_out.9_6.out6_5", + "GroupNorm_6.out12_6.out4_0_5", "NhwcConv_7_out-/decoder/up_blocks.0/resnets.0/conv2/Conv_output_0.out0_1_7", "/decoder/up_blocks.0/resnets.0/Add_output_0_NHWC.out_2_1_3", - "GroupNorm_7.out9_7.out4_1_6", - "GroupNorm_7_sigmoid_out.9_7.out6_6", + "GroupNorm_7.out12_7.out4_0_6", "NhwcConv_8_out-/decoder/up_blocks.0/resnets.1/conv1/Conv_output_0.out0_1_8", - "GroupNorm_8.out9_8.out4_1_7", - "GroupNorm_8_sigmoid_out.9_8.out6_7", + "GroupNorm_8.out12_8.out4_0_7", "NhwcConv_9_out-/decoder/up_blocks.0/resnets.1/conv2/Conv_output_0.out0_1_9", "/decoder/up_blocks.0/resnets.1/Add_output_0_NHWC.out_2_1_4", - "GroupNorm_9.out9_9.out4_1_8", - "GroupNorm_9_sigmoid_out.9_9.out6_8", + "GroupNorm_9.out12_9.out4_0_8", "NhwcConv_10_out-/decoder/up_blocks.0/resnets.2/conv1/Conv_output_0.out0_1_10", - "GroupNorm_10.out9_10.out4_1_9", - "GroupNorm_10_sigmoid_out.9_10.out6_9", + "GroupNorm_10.out12_10.out4_0_9", "NhwcConv_11_out-/decoder/up_blocks.0/resnets.2/conv2/Conv_output_0.out0_1_11", - "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc2_0.out_2_1_5", - "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc2_0.out_7_0", + "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc5_0.out_2_1_5", + "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc5_0.out_7_0", "NhwcConv_12_out-/decoder/up_blocks.0/upsamplers.0/conv/Conv_output_0.out0_1_12", - "GroupNorm_11.out9_11.out4_1_10", - "GroupNorm_11_sigmoid_out.9_11.out6_10", + "GroupNorm_11.out12_11.out4_0_10", "NhwcConv_13_out-/decoder/up_blocks.1/resnets.0/conv1/Conv_output_0.out0_1_13", - "GroupNorm_12.out9_12.out4_1_11", - "GroupNorm_12_sigmoid_out.9_12.out6_11", + "GroupNorm_12.out12_12.out4_0_11", "NhwcConv_14_out-/decoder/up_blocks.1/resnets.0/conv2/Conv_output_0.out0_1_14", "/decoder/up_blocks.1/resnets.0/Add.out_2_1_6", - "GroupNorm_13.out9_13.out4_1_12", - "GroupNorm_13_sigmoid_out.9_13.out6_12", + "GroupNorm_13.out12_13.out4_0_12", "NhwcConv_15_out-/decoder/up_blocks.1/resnets.1/conv1/Conv_output_0.out0_1_15", - "GroupNorm_14.out9_14.out4_1_13", - "GroupNorm_14_sigmoid_out.9_14.out6_13", + "GroupNorm_14.out12_14.out4_0_13", "NhwcConv_16_out-/decoder/up_blocks.1/resnets.1/conv2/Conv_output_0.out0_1_16", "/decoder/up_blocks.1/resnets.1/Add.out_2_1_7", - "GroupNorm_15.out9_15.out4_1_14", - "GroupNorm_15_sigmoid_out.9_15.out6_14", + "GroupNorm_15.out12_15.out4_0_14", "NhwcConv_17_out-/decoder/up_blocks.1/resnets.2/conv1/Conv_output_0.out0_1_17", - "GroupNorm_16.out9_16.out4_1_15", - "GroupNorm_16_sigmoid_out.9_16.out6_15", + "GroupNorm_16.out12_16.out4_0_15", "NhwcConv_18_out-/decoder/up_blocks.1/resnets.2/conv2/Conv_output_0.out0_1_18", "/decoder/up_blocks.1/resnets.2/Add.out_2_1_8", - "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc2_1.out_7_1", + "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc5_1.out_7_1", "NhwcConv_19_out-/decoder/up_blocks.1/upsamplers.0/conv/Conv_output_0.out0_1_19", - "GroupNorm_17.out9_17.out4_1_16", + "GroupNorm_17.out12_17.out4_0_16", "NhwcConv_20_out-/decoder/up_blocks.2/resnets.0/conv_shortcut/Conv_output_0.out0_1_20", - "GroupNorm_17_sigmoid_out.9_17.out6_16", "NhwcConv_21_out-/decoder/up_blocks.2/resnets.0/conv1/Conv_output_0.out0_1_21", - "GroupNorm_18.out9_18.out4_1_17", - "GroupNorm_18_sigmoid_out.9_18.out6_17", + "GroupNorm_18.out12_18.out4_0_17", "NhwcConv_22_out-/decoder/up_blocks.2/resnets.0/conv2/Conv_output_0.out0_1_22", "/decoder/up_blocks.2/resnets.0/Add.out_2_1_9", - "GroupNorm_19.out9_19.out4_1_18", - "GroupNorm_19_sigmoid_out.9_19.out6_18", + "GroupNorm_19.out12_19.out4_0_18", "NhwcConv_23_out-/decoder/up_blocks.2/resnets.1/conv1/Conv_output_0.out0_1_23", - "GroupNorm_20.out9_20.out4_1_19", - "GroupNorm_20_sigmoid_out.9_20.out6_19", + "GroupNorm_20.out12_20.out4_0_19", "NhwcConv_24_out-/decoder/up_blocks.2/resnets.1/conv2/Conv_output_0.out0_1_24", "/decoder/up_blocks.2/resnets.1/Add.out_2_1_10", - "GroupNorm_21.out9_21.out4_1_20", - "GroupNorm_21_sigmoid_out.9_21.out6_20", + "GroupNorm_21.out12_21.out4_0_20", "NhwcConv_25_out-/decoder/up_blocks.2/resnets.2/conv1/Conv_output_0.out0_1_25", - "GroupNorm_22.out9_22.out4_1_21", - "GroupNorm_22_sigmoid_out.9_22.out6_21", + "GroupNorm_22.out12_22.out4_0_21", "NhwcConv_26_out-/decoder/up_blocks.2/resnets.2/conv2/Conv_output_0.out0_1_26", "/decoder/up_blocks.2/resnets.2/Add.out_2_1_11", - "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc2_2.out_7_2", + "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc5_2.out_7_2", "NhwcConv_27_out-/decoder/up_blocks.2/upsamplers.0/conv/Conv_output_0.out0_1_27", - "GroupNorm_23.out9_23.out4_1_22", + "GroupNorm_23.out12_23.out4_0_22", "NhwcConv_28_out-/decoder/up_blocks.3/resnets.0/conv_shortcut/Conv_output_0.out0_1_28", - "GroupNorm_23_sigmoid_out.9_23.out6_22", "NhwcConv_29_out-/decoder/up_blocks.3/resnets.0/conv1/Conv_output_0.out0_1_29", - "GroupNorm_24.out9_24.out4_1_23", - "GroupNorm_24_sigmoid_out.9_24.out6_23", + "GroupNorm_24.out12_24.out4_0_23", "NhwcConv_30_out-/decoder/up_blocks.3/resnets.0/conv2/Conv_output_0.out0_1_30", "/decoder/up_blocks.3/resnets.0/Add.out_2_1_12", - "GroupNorm_25.out9_25.out4_1_24", - "GroupNorm_25_sigmoid_out.9_25.out6_24", + "GroupNorm_25.out12_25.out4_0_24", "NhwcConv_31_out-/decoder/up_blocks.3/resnets.1/conv1/Conv_output_0.out0_1_31", - "GroupNorm_26.out9_26.out4_1_25", - "GroupNorm_26_sigmoid_out.9_26.out6_25", + "GroupNorm_26.out12_26.out4_0_25", "NhwcConv_32_out-/decoder/up_blocks.3/resnets.1/conv2/Conv_output_0.out0_1_32", "/decoder/up_blocks.3/resnets.1/Add.out_2_1_13", - "GroupNorm_27.out9_27.out4_1_26", - "GroupNorm_27_sigmoid_out.9_27.out6_26", + "GroupNorm_27.out12_27.out4_0_26", "NhwcConv_33_out-/decoder/up_blocks.3/resnets.2/conv1/Conv_output_0.out0_1_33", - "GroupNorm_28.out9_28.out4_1_27", - "GroupNorm_28_sigmoid_out.9_28.out6_27", + "GroupNorm_28.out12_28.out4_0_27", "NhwcConv_34_out-/decoder/up_blocks.3/resnets.2/conv2/Conv_output_0.out0_1_34", "/decoder/up_blocks.3/resnets.2/Add.out_2_1_14", - "GroupNorm_29.out9_29.out4_1_28", - "GroupNorm_29_sigmoid_out.9_29.out6_28" + "GroupNorm_29.out12_29.out4_0_28" ] }, "const": { - "buffer_size": 58459280, + "buffer_size": 61089680, "xrt_arg_id": 3, "packed_tensors": [ "NhwcConv_0_weight_NHWC", "NhwcConv_1_weight_NHWC", - "GroupNorm_0_wts_4_1_0", - "Sigmoid_0.weights6_0", + "GroupNorm_0_wts_4_0_0", "NhwcConv_2_weight_NHWC", - "GroupNorm_1_wts_4_1_1", - "Sigmoid_1.weights6_1", + "GroupNorm_1_wts_4_0_1", "NhwcConv_3_weight_NHWC", - "GroupNorm_2_wts_4_0_0", - "onnx::MatMul_918", - "onnx::MatMul_917", - "onnx::MatMul_919", - "/decoder/mid_block/attentions.0/MatMulmha_1_0_mask.10_0", - "onnx::MatMul_927", - "GroupNorm_3_wts_4_1_2", - "Sigmoid_2.weights6_2", + "GroupNorm_2_wts_4_1_0", + "onnx::MatMul_918_2_0_0", + "onnx::MatMul_917_2_0_1", + "onnx::MatMul_919_2_0_2", + "/decoder/mid_block/attentions.0/MatMul/MatMulScaleFusion/_FusedActivationmha_3_1_0_mask.10_0", + "onnx::MatMul_927_2_0_3", + "GroupNorm_3_wts_4_0_2", "NhwcConv_4_weight_NHWC", - "GroupNorm_4_wts_4_1_3", - "Sigmoid_3.weights6_3", + "GroupNorm_4_wts_4_0_3", "NhwcConv_5_weight_NHWC", - "GroupNorm_5_wts_4_1_4", - "Sigmoid_4.weights6_4", + "GroupNorm_5_wts_4_0_4", "NhwcConv_6_weight_NHWC", - "GroupNorm_6_wts_4_1_5", - "Sigmoid_5.weights6_5", + "GroupNorm_6_wts_4_0_5", "NhwcConv_7_weight_NHWC", - "GroupNorm_7_wts_4_1_6", - "Sigmoid_6.weights6_6", + "GroupNorm_7_wts_4_0_6", "NhwcConv_8_weight_NHWC", - "GroupNorm_8_wts_4_1_7", - "Sigmoid_7.weights6_7", + "GroupNorm_8_wts_4_0_7", "NhwcConv_9_weight_NHWC", - "GroupNorm_9_wts_4_1_8", - "Sigmoid_8.weights6_8", + "GroupNorm_9_wts_4_0_8", "NhwcConv_10_weight_NHWC", - "GroupNorm_10_wts_4_1_9", - "Sigmoid_9.weights6_9", + "GroupNorm_10_wts_4_0_9", "NhwcConv_11_weight_NHWC", "/decoder/up_blocks.0/upsamplers.0/Resize.weights7_0", "NhwcConv_12_weight_NHWC", - "GroupNorm_11_wts_4_1_10", - "Sigmoid_10.weights6_10", + "GroupNorm_11_wts_4_0_10", "NhwcConv_13_weight_NHWC", - "GroupNorm_12_wts_4_1_11", - "Sigmoid_11.weights6_11", + "GroupNorm_12_wts_4_0_11", "NhwcConv_14_weight_NHWC", - "GroupNorm_13_wts_4_1_12", - "Sigmoid_12.weights6_12", + "GroupNorm_13_wts_4_0_12", "NhwcConv_15_weight_NHWC", - "GroupNorm_14_wts_4_1_13", - "Sigmoid_13.weights6_13", + "GroupNorm_14_wts_4_0_13", "NhwcConv_16_weight_NHWC", - "GroupNorm_15_wts_4_1_14", - "Sigmoid_14.weights6_14", + "GroupNorm_15_wts_4_0_14", "NhwcConv_17_weight_NHWC", - "GroupNorm_16_wts_4_1_15", - "Sigmoid_15.weights6_15", + "GroupNorm_16_wts_4_0_15", "NhwcConv_18_weight_NHWC", "/decoder/up_blocks.1/upsamplers.0/Resize.weights7_1", "NhwcConv_19_weight_NHWC", - "GroupNorm_17_wts_4_1_16", + "GroupNorm_17_wts_4_0_16", "NhwcConv_20_weight_NHWC", - "Sigmoid_16.weights6_16", "NhwcConv_21_weight_NHWC", - "GroupNorm_18_wts_4_1_17", - "Sigmoid_17.weights6_17", + "GroupNorm_18_wts_4_0_17", "NhwcConv_22_weight_NHWC", - "GroupNorm_19_wts_4_1_18", - "Sigmoid_18.weights6_18", + "GroupNorm_19_wts_4_0_18", "NhwcConv_23_weight_NHWC", - "GroupNorm_20_wts_4_1_19", - "Sigmoid_19.weights6_19", + "GroupNorm_20_wts_4_0_19", "NhwcConv_24_weight_NHWC", - "GroupNorm_21_wts_4_1_20", - "Sigmoid_20.weights6_20", + "GroupNorm_21_wts_4_0_20", "NhwcConv_25_weight_NHWC", - "GroupNorm_22_wts_4_1_21", - "Sigmoid_21.weights6_21", + "GroupNorm_22_wts_4_0_21", "NhwcConv_26_weight_NHWC", "/decoder/up_blocks.2/upsamplers.0/Resize.weights7_2", "NhwcConv_27_weight_NHWC", - "GroupNorm_23_wts_4_1_22", + "GroupNorm_23_wts_4_0_22", "NhwcConv_28_weight_NHWC", - "Sigmoid_22.weights6_22", "NhwcConv_29_weight_NHWC", - "GroupNorm_24_wts_4_1_23", - "Sigmoid_23.weights6_23", + "GroupNorm_24_wts_4_0_23", "NhwcConv_30_weight_NHWC", - "GroupNorm_25_wts_4_1_24", - "Sigmoid_24.weights6_24", + "GroupNorm_25_wts_4_0_24", "NhwcConv_31_weight_NHWC", - "GroupNorm_26_wts_4_1_25", - "Sigmoid_25.weights6_25", + "GroupNorm_26_wts_4_0_25", "NhwcConv_32_weight_NHWC", - "GroupNorm_27_wts_4_1_26", - "Sigmoid_26.weights6_26", + "GroupNorm_27_wts_4_0_26", "NhwcConv_33_weight_NHWC", - "GroupNorm_28_wts_4_1_27", - "Sigmoid_27.weights6_27", + "GroupNorm_28_wts_4_0_27", "NhwcConv_34_weight_NHWC", - "GroupNorm_29_wts_4_1_28", - "Sigmoid_28.weights6_28", + "GroupNorm_29_wts_4_0_28", "NhwcConv_35_weight_NHWC" ] }, @@ -9799,7 +8456,7 @@ "op_tensor_size": 4194304, "offset": 32768 }, - "GroupNorm_0.out9_0.out4_1_0": { + "GroupNorm_0.out12_0.out4_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9813,7 +8470,7 @@ "op_tensor_size": 4194304, "offset": 4227072 }, - "GroupNorm_0_sigmoid_out.9_0.out6_0": { + "NhwcConv_2_out-/decoder/mid_block/resnets.0/conv1/Conv_output_0.out0_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9827,7 +8484,7 @@ "op_tensor_size": 4194304, "offset": 8421376 }, - "NhwcConv_2_out-/decoder/mid_block/resnets.0/conv1/Conv_output_0.out0_1_2": { + "GroupNorm_1.out12_1.out4_0_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9841,7 +8498,7 @@ "op_tensor_size": 4194304, "offset": 12615680 }, - "GroupNorm_1.out9_1.out4_1_1": { + "NhwcConv_3_out-/decoder/mid_block/resnets.0/conv2/Conv_output_0.out0_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9855,7 +8512,7 @@ "op_tensor_size": 4194304, "offset": 16809984 }, - "GroupNorm_1_sigmoid_out.9_1.out6_1": { + "/decoder/mid_block/resnets.0/Add.out_2_1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9869,35 +8526,33 @@ "op_tensor_size": 4194304, "offset": 21004288 }, - "NhwcConv_3_out-/decoder/mid_block/resnets.0/conv2/Conv_output_0.out0_1_3": { + "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_1_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 4096, 512 ], "size_in_bytes": 4194304, "op_tensor_size": 4194304, "offset": 25198592 }, - "/decoder/mid_block/resnets.0/Add.out_2_1_0": { + "/decoder/mid_block/attentions.0/Reshape_2_output_0_3d.out1_3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 4096, 512 ], "size_in_bytes": 4194304, "op_tensor_size": 4194304, "offset": 29392896 }, - "/decoder/mid_block/attentions.0/group_norm/Add_output_0_4d_NHWC.out4_0_0": { + "/decoder/mid_block/attentions.0/Reshape_1_output_0_3d.out1_3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9910,7 +8565,7 @@ "op_tensor_size": 4194304, "offset": 33587200 }, - "/decoder/mid_block/attentions.0/to_k/Add_output_0.out1_3_0": { + "/decoder/mid_block/attentions.0/Reshape_3_output_0_3d.out1_3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9923,7 +8578,7 @@ "op_tensor_size": 4194304, "offset": 37781504 }, - "/decoder/mid_block/attentions.0/to_q/Add_output_0.out1_3_1": { + "/decoder/mid_block/attentions.0/Transpose_5_output_0_3d.out10_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9936,33 +8591,35 @@ "op_tensor_size": 4194304, "offset": 41975808 }, - "/decoder/mid_block/attentions.0/to_v/Add_output_0.out1_3_2": { + "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc6_0.out1_0_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 4096, + 64, + 64, 512 ], "size_in_bytes": 4194304, "op_tensor_size": 4194304, "offset": 46170112 }, - "/decoder/mid_block/attentions.0/Reshape_4_output_0.out10_0": { + "/decoder/mid_block/attentions.0/Add_output_0_NHWC.out_2_1_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 4096, + 64, + 64, 512 ], "size_in_bytes": 4194304, "op_tensor_size": 4194304, "offset": 50364416 }, - "/decoder/mid_block/attentions.0/Reshape_5_output_0.nhwc3_0.out1_0_0": { + "GroupNorm_3.out12_3.out4_0_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9976,7 +8633,7 @@ "op_tensor_size": 4194304, "offset": 54558720 }, - "/decoder/mid_block/attentions.0/Add_output_0_NHWC.out_2_1_1": { + "NhwcConv_4_out-/decoder/mid_block/resnets.1/conv1/Conv_output_0.out0_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -9990,7 +8647,7 @@ "op_tensor_size": 4194304, "offset": 58753024 }, - "GroupNorm_3.out9_3.out4_1_2": { + "GroupNorm_4.out12_4.out4_0_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10004,7 +8661,7 @@ "op_tensor_size": 4194304, "offset": 62947328 }, - "GroupNorm_3_sigmoid_out.9_3.out6_2": { + "NhwcConv_5_out-/decoder/mid_block/resnets.1/conv2/Conv_output_0.out0_1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10018,7 +8675,7 @@ "op_tensor_size": 4194304, "offset": 67141632 }, - "NhwcConv_4_out-/decoder/mid_block/resnets.1/conv1/Conv_output_0.out0_1_4": { + "/decoder/mid_block/resnets.1/Add_output_0_NHWC.out_2_1_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10032,7 +8689,7 @@ "op_tensor_size": 4194304, "offset": 71335936 }, - "GroupNorm_4.out9_4.out4_1_3": { + "GroupNorm_5.out12_5.out4_0_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10046,7 +8703,7 @@ "op_tensor_size": 4194304, "offset": 75530240 }, - "GroupNorm_4_sigmoid_out.9_4.out6_3": { + "NhwcConv_6_out-/decoder/up_blocks.0/resnets.0/conv1/Conv_output_0.out0_1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10060,7 +8717,7 @@ "op_tensor_size": 4194304, "offset": 79724544 }, - "NhwcConv_5_out-/decoder/mid_block/resnets.1/conv2/Conv_output_0.out0_1_5": { + "GroupNorm_6.out12_6.out4_0_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10074,7 +8731,7 @@ "op_tensor_size": 4194304, "offset": 83918848 }, - "/decoder/mid_block/resnets.1/Add_output_0_NHWC.out_2_1_2": { + "NhwcConv_7_out-/decoder/up_blocks.0/resnets.0/conv2/Conv_output_0.out0_1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10088,7 +8745,7 @@ "op_tensor_size": 4194304, "offset": 88113152 }, - "GroupNorm_5.out9_5.out4_1_4": { + "/decoder/up_blocks.0/resnets.0/Add_output_0_NHWC.out_2_1_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10102,7 +8759,7 @@ "op_tensor_size": 4194304, "offset": 92307456 }, - "GroupNorm_5_sigmoid_out.9_5.out6_4": { + "GroupNorm_7.out12_7.out4_0_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10116,7 +8773,7 @@ "op_tensor_size": 4194304, "offset": 96501760 }, - "NhwcConv_6_out-/decoder/up_blocks.0/resnets.0/conv1/Conv_output_0.out0_1_6": { + "NhwcConv_8_out-/decoder/up_blocks.0/resnets.1/conv1/Conv_output_0.out0_1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10130,7 +8787,7 @@ "op_tensor_size": 4194304, "offset": 100696064 }, - "GroupNorm_6.out9_6.out4_1_5": { + "GroupNorm_8.out12_8.out4_0_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10144,7 +8801,7 @@ "op_tensor_size": 4194304, "offset": 104890368 }, - "GroupNorm_6_sigmoid_out.9_6.out6_5": { + "NhwcConv_9_out-/decoder/up_blocks.0/resnets.1/conv2/Conv_output_0.out0_1_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10158,7 +8815,7 @@ "op_tensor_size": 4194304, "offset": 109084672 }, - "NhwcConv_7_out-/decoder/up_blocks.0/resnets.0/conv2/Conv_output_0.out0_1_7": { + "/decoder/up_blocks.0/resnets.1/Add_output_0_NHWC.out_2_1_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10172,7 +8829,7 @@ "op_tensor_size": 4194304, "offset": 113278976 }, - "/decoder/up_blocks.0/resnets.0/Add_output_0_NHWC.out_2_1_3": { + "GroupNorm_9.out12_9.out4_0_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10186,7 +8843,7 @@ "op_tensor_size": 4194304, "offset": 117473280 }, - "GroupNorm_7.out9_7.out4_1_6": { + "NhwcConv_10_out-/decoder/up_blocks.0/resnets.2/conv1/Conv_output_0.out0_1_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10200,7 +8857,7 @@ "op_tensor_size": 4194304, "offset": 121667584 }, - "GroupNorm_7_sigmoid_out.9_7.out6_6": { + "GroupNorm_10.out12_10.out4_0_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10214,7 +8871,7 @@ "op_tensor_size": 4194304, "offset": 125861888 }, - "NhwcConv_8_out-/decoder/up_blocks.0/resnets.1/conv1/Conv_output_0.out0_1_8": { + "NhwcConv_11_out-/decoder/up_blocks.0/resnets.2/conv2/Conv_output_0.out0_1_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10228,7 +8885,7 @@ "op_tensor_size": 4194304, "offset": 130056192 }, - "GroupNorm_8.out9_8.out4_1_7": { + "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc5_0.out_2_1_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10242,147 +8899,133 @@ "op_tensor_size": 4194304, "offset": 134250496 }, - "GroupNorm_8_sigmoid_out.9_8.out6_7": { + "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc5_0.out_7_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, "offset": 138444800 }, - "NhwcConv_9_out-/decoder/up_blocks.0/resnets.1/conv2/Conv_output_0.out0_1_9": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 64, - 64, - 512 - ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 142639104 - }, - "/decoder/up_blocks.0/resnets.1/Add_output_0_NHWC.out_2_1_4": { + "NhwcConv_12_out-/decoder/up_blocks.0/upsamplers.0/conv/Conv_output_0.out0_1_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 146833408 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 155222016 }, - "GroupNorm_9.out9_9.out4_1_8": { + "GroupNorm_11.out12_11.out4_0_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 151027712 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 171999232 }, - "GroupNorm_9_sigmoid_out.9_9.out6_8": { + "NhwcConv_13_out-/decoder/up_blocks.1/resnets.0/conv1/Conv_output_0.out0_1_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 155222016 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 188776448 }, - "NhwcConv_10_out-/decoder/up_blocks.0/resnets.2/conv1/Conv_output_0.out0_1_10": { + "GroupNorm_12.out12_12.out4_0_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 159416320 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 205553664 }, - "GroupNorm_10.out9_10.out4_1_9": { + "NhwcConv_14_out-/decoder/up_blocks.1/resnets.0/conv2/Conv_output_0.out0_1_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 163610624 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 222330880 }, - "GroupNorm_10_sigmoid_out.9_10.out6_9": { + "/decoder/up_blocks.1/resnets.0/Add.out_2_1_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 167804928 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 239108096 }, - "NhwcConv_11_out-/decoder/up_blocks.0/resnets.2/conv2/Conv_output_0.out0_1_11": { + "GroupNorm_13.out12_13.out4_0_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 171999232 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 255885312 }, - "/decoder/up_blocks.0/resnets.2/Add_output_0.nhwc2_0.out_2_1_5": { + "NhwcConv_15_out-/decoder/up_blocks.1/resnets.1/conv1/Conv_output_0.out0_1_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, - 64, - 64, + 128, + 128, 512 ], - "size_in_bytes": 4194304, - "op_tensor_size": 4194304, - "offset": 176193536 + "size_in_bytes": 16777216, + "op_tensor_size": 16777216, + "offset": 272662528 }, - "/decoder/up_blocks.0/upsamplers.0/Resize_output_0.nhwc2_0.out_7_0": { + "GroupNorm_14.out12_14.out4_0_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10394,9 +9037,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 180387840 + "offset": 289439744 }, - "NhwcConv_12_out-/decoder/up_blocks.0/upsamplers.0/conv/Conv_output_0.out0_1_12": { + "NhwcConv_16_out-/decoder/up_blocks.1/resnets.1/conv2/Conv_output_0.out0_1_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10408,9 +9051,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 197165056 + "offset": 306216960 }, - "GroupNorm_11.out9_11.out4_1_10": { + "/decoder/up_blocks.1/resnets.1/Add.out_2_1_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10422,9 +9065,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 213942272 + "offset": 322994176 }, - "GroupNorm_11_sigmoid_out.9_11.out6_10": { + "GroupNorm_15.out12_15.out4_0_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10436,9 +9079,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 230719488 + "offset": 339771392 }, - "NhwcConv_13_out-/decoder/up_blocks.1/resnets.0/conv1/Conv_output_0.out0_1_13": { + "NhwcConv_17_out-/decoder/up_blocks.1/resnets.2/conv1/Conv_output_0.out0_1_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10450,9 +9093,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 247496704 + "offset": 356548608 }, - "GroupNorm_12.out9_12.out4_1_11": { + "GroupNorm_16.out12_16.out4_0_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10464,9 +9107,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 264273920 + "offset": 373325824 }, - "GroupNorm_12_sigmoid_out.9_12.out6_11": { + "NhwcConv_18_out-/decoder/up_blocks.1/resnets.2/conv2/Conv_output_0.out0_1_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10478,9 +9121,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 281051136 + "offset": 390103040 }, - "NhwcConv_14_out-/decoder/up_blocks.1/resnets.0/conv2/Conv_output_0.out0_1_14": { + "/decoder/up_blocks.1/resnets.2/Add.out_2_1_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10492,219 +9135,9 @@ ], "size_in_bytes": 16777216, "op_tensor_size": 16777216, - "offset": 297828352 + "offset": 406880256 }, - "/decoder/up_blocks.1/resnets.0/Add.out_2_1_6": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 314605568 - }, - "GroupNorm_13.out9_13.out4_1_12": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 331382784 - }, - "GroupNorm_13_sigmoid_out.9_13.out6_12": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 348160000 - }, - "NhwcConv_15_out-/decoder/up_blocks.1/resnets.1/conv1/Conv_output_0.out0_1_15": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 364937216 - }, - "GroupNorm_14.out9_14.out4_1_13": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 381714432 - }, - "GroupNorm_14_sigmoid_out.9_14.out6_13": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 398491648 - }, - "NhwcConv_16_out-/decoder/up_blocks.1/resnets.1/conv2/Conv_output_0.out0_1_16": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 415268864 - }, - "/decoder/up_blocks.1/resnets.1/Add.out_2_1_7": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 432046080 - }, - "GroupNorm_15.out9_15.out4_1_14": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 448823296 - }, - "GroupNorm_15_sigmoid_out.9_15.out6_14": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 465600512 - }, - "NhwcConv_17_out-/decoder/up_blocks.1/resnets.2/conv1/Conv_output_0.out0_1_17": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 482377728 - }, - "GroupNorm_16.out9_16.out4_1_15": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 499154944 - }, - "GroupNorm_16_sigmoid_out.9_16.out6_15": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 515932160 - }, - "NhwcConv_18_out-/decoder/up_blocks.1/resnets.2/conv2/Conv_output_0.out0_1_18": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 532709376 - }, - "/decoder/up_blocks.1/resnets.2/Add.out_2_1_8": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 128, - 128, - 512 - ], - "size_in_bytes": 16777216, - "op_tensor_size": 16777216, - "offset": 549486592 - }, - "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc2_1.out_7_1": { + "/decoder/up_blocks.1/upsamplers.0/Resize_output_0.nhwc5_1.out_7_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10716,7 +9149,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 566263808 + "offset": 423657472 }, "NhwcConv_19_out-/decoder/up_blocks.1/upsamplers.0/conv/Conv_output_0.out0_1_19": { "packed_buffer_label": "scratch", @@ -10730,9 +9163,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 633372672 + "offset": 490766336 }, - "GroupNorm_17.out9_17.out4_1_16": { + "GroupNorm_17.out12_17.out4_0_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10744,7 +9177,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 700481536 + "offset": 557875200 }, "NhwcConv_20_out-/decoder/up_blocks.2/resnets.0/conv_shortcut/Conv_output_0.out0_1_20": { "packed_buffer_label": "scratch", @@ -10758,21 +9191,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 767590400 - }, - "GroupNorm_17_sigmoid_out.9_17.out6_16": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 512 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 801144832 + "offset": 624984064 }, "NhwcConv_21_out-/decoder/up_blocks.2/resnets.0/conv1/Conv_output_0.out0_1_21": { "packed_buffer_label": "scratch", @@ -10786,23 +9205,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 868253696 - }, - "GroupNorm_18.out9_18.out4_1_17": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 256 - ], - "size_in_bytes": 33554432, - "op_tensor_size": 33554432, - "offset": 901808128 + "offset": 658538496 }, - "GroupNorm_18_sigmoid_out.9_18.out6_17": { + "GroupNorm_18.out12_18.out4_0_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10814,7 +9219,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 935362560 + "offset": 692092928 }, "NhwcConv_22_out-/decoder/up_blocks.2/resnets.0/conv2/Conv_output_0.out0_1_22": { "packed_buffer_label": "scratch", @@ -10828,7 +9233,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 968916992 + "offset": 725647360 }, "/decoder/up_blocks.2/resnets.0/Add.out_2_1_9": { "packed_buffer_label": "scratch", @@ -10842,23 +9247,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1002471424 - }, - "GroupNorm_19.out9_19.out4_1_18": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 256 - ], - "size_in_bytes": 33554432, - "op_tensor_size": 33554432, - "offset": 1036025856 + "offset": 759201792 }, - "GroupNorm_19_sigmoid_out.9_19.out6_18": { + "GroupNorm_19.out12_19.out4_0_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10870,7 +9261,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1069580288 + "offset": 792756224 }, "NhwcConv_23_out-/decoder/up_blocks.2/resnets.1/conv1/Conv_output_0.out0_1_23": { "packed_buffer_label": "scratch", @@ -10884,9 +9275,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1103134720 + "offset": 826310656 }, - "GroupNorm_20.out9_20.out4_1_19": { + "GroupNorm_20.out12_20.out4_0_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10898,21 +9289,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1136689152 - }, - "GroupNorm_20_sigmoid_out.9_20.out6_19": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 256 - ], - "size_in_bytes": 33554432, - "op_tensor_size": 33554432, - "offset": 1170243584 + "offset": 859865088 }, "NhwcConv_24_out-/decoder/up_blocks.2/resnets.1/conv2/Conv_output_0.out0_1_24": { "packed_buffer_label": "scratch", @@ -10926,7 +9303,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1203798016 + "offset": 893419520 }, "/decoder/up_blocks.2/resnets.1/Add.out_2_1_10": { "packed_buffer_label": "scratch", @@ -10940,9 +9317,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1237352448 + "offset": 926973952 }, - "GroupNorm_21.out9_21.out4_1_20": { + "GroupNorm_21.out12_21.out4_0_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10954,21 +9331,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1270906880 - }, - "GroupNorm_21_sigmoid_out.9_21.out6_20": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 256 - ], - "size_in_bytes": 33554432, - "op_tensor_size": 33554432, - "offset": 1304461312 + "offset": 960528384 }, "NhwcConv_25_out-/decoder/up_blocks.2/resnets.2/conv1/Conv_output_0.out0_1_25": { "packed_buffer_label": "scratch", @@ -10982,9 +9345,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1338015744 + "offset": 994082816 }, - "GroupNorm_22.out9_22.out4_1_21": { + "GroupNorm_22.out12_22.out4_0_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -10996,21 +9359,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1371570176 - }, - "GroupNorm_22_sigmoid_out.9_22.out6_21": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 256, - 256, - 256 - ], - "size_in_bytes": 33554432, - "op_tensor_size": 33554432, - "offset": 1405124608 + "offset": 1027637248 }, "NhwcConv_26_out-/decoder/up_blocks.2/resnets.2/conv2/Conv_output_0.out0_1_26": { "packed_buffer_label": "scratch", @@ -11024,7 +9373,7 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1438679040 + "offset": 1061191680 }, "/decoder/up_blocks.2/resnets.2/Add.out_2_1_11": { "packed_buffer_label": "scratch", @@ -11038,9 +9387,9 @@ ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, - "offset": 1472233472 + "offset": 1094746112 }, - "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc2_2.out_7_2": { + "/decoder/up_blocks.2/upsamplers.0/Resize_output_0.nhwc5_2.out_7_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11052,7 +9401,7 @@ ], "size_in_bytes": 134217728, "op_tensor_size": 134217728, - "offset": 1505787904 + "offset": 1128300544 }, "NhwcConv_27_out-/decoder/up_blocks.2/upsamplers.0/conv/Conv_output_0.out0_1_27": { "packed_buffer_label": "scratch", @@ -11066,9 +9415,9 @@ ], "size_in_bytes": 134217728, "op_tensor_size": 134217728, - "offset": 1640005632 + "offset": 1262518272 }, - "GroupNorm_23.out9_23.out4_1_22": { + "GroupNorm_23.out12_23.out4_0_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11080,7 +9429,7 @@ ], "size_in_bytes": 134217728, "op_tensor_size": 134217728, - "offset": 1774223360 + "offset": 1396736000 }, "NhwcConv_28_out-/decoder/up_blocks.3/resnets.0/conv_shortcut/Conv_output_0.out0_1_28": { "packed_buffer_label": "scratch", @@ -11094,21 +9443,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 1908441088 - }, - "GroupNorm_23_sigmoid_out.9_23.out6_22": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 256 - ], - "size_in_bytes": 134217728, - "op_tensor_size": 134217728, - "offset": 1975549952 + "offset": 1530953728 }, "NhwcConv_29_out-/decoder/up_blocks.3/resnets.0/conv1/Conv_output_0.out0_1_29": { "packed_buffer_label": "scratch", @@ -11122,23 +9457,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2109767680 - }, - "GroupNorm_24.out9_24.out4_1_23": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 2176876544 + "offset": 1598062592 }, - "GroupNorm_24_sigmoid_out.9_24.out6_23": { + "GroupNorm_24.out12_24.out4_0_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11150,7 +9471,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2243985408 + "offset": 1665171456 }, "NhwcConv_30_out-/decoder/up_blocks.3/resnets.0/conv2/Conv_output_0.out0_1_30": { "packed_buffer_label": "scratch", @@ -11164,7 +9485,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2311094272 + "offset": 1732280320 }, "/decoder/up_blocks.3/resnets.0/Add.out_2_1_12": { "packed_buffer_label": "scratch", @@ -11178,9 +9499,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2378203136 + "offset": 1799389184 }, - "GroupNorm_25.out9_25.out4_1_24": { + "GroupNorm_25.out12_25.out4_0_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11192,21 +9513,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2445312000 - }, - "GroupNorm_25_sigmoid_out.9_25.out6_24": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 2512420864 + "offset": 1866498048 }, "NhwcConv_31_out-/decoder/up_blocks.3/resnets.1/conv1/Conv_output_0.out0_1_31": { "packed_buffer_label": "scratch", @@ -11220,9 +9527,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2579529728 + "offset": 1933606912 }, - "GroupNorm_26.out9_26.out4_1_25": { + "GroupNorm_26.out12_26.out4_0_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11234,21 +9541,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2646638592 - }, - "GroupNorm_26_sigmoid_out.9_26.out6_25": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 2713747456 + "offset": 2000715776 }, "NhwcConv_32_out-/decoder/up_blocks.3/resnets.1/conv2/Conv_output_0.out0_1_32": { "packed_buffer_label": "scratch", @@ -11262,7 +9555,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2780856320 + "offset": 2067824640 }, "/decoder/up_blocks.3/resnets.1/Add.out_2_1_13": { "packed_buffer_label": "scratch", @@ -11276,9 +9569,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2847965184 + "offset": 2134933504 }, - "GroupNorm_27.out9_27.out4_1_26": { + "GroupNorm_27.out12_27.out4_0_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11290,21 +9583,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 2915074048 - }, - "GroupNorm_27_sigmoid_out.9_27.out6_26": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 2982182912 + "offset": 2202042368 }, "NhwcConv_33_out-/decoder/up_blocks.3/resnets.2/conv1/Conv_output_0.out0_1_33": { "packed_buffer_label": "scratch", @@ -11318,23 +9597,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 3049291776 - }, - "GroupNorm_28.out9_28.out4_1_27": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 3116400640 + "offset": 2269151232 }, - "GroupNorm_28_sigmoid_out.9_28.out6_27": { + "GroupNorm_28.out12_28.out4_0_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11346,7 +9611,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 3183509504 + "offset": 2336260096 }, "NhwcConv_34_out-/decoder/up_blocks.3/resnets.2/conv2/Conv_output_0.out0_1_34": { "packed_buffer_label": "scratch", @@ -11360,7 +9625,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 3250618368 + "offset": 2403368960 }, "/decoder/up_blocks.3/resnets.2/Add.out_2_1_14": { "packed_buffer_label": "scratch", @@ -11374,9 +9639,9 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 3317727232 + "offset": 2470477824 }, - "GroupNorm_29.out9_29.out4_1_28": { + "GroupNorm_29.out12_29.out4_0_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", @@ -11388,21 +9653,7 @@ ], "size_in_bytes": 67108864, "op_tensor_size": 67108864, - "offset": 3384836096 - }, - "GroupNorm_29_sigmoid_out.9_29.out6_28": { - "packed_buffer_label": "scratch", - "xrt_arg_id": 2, - "dtype": "bfloat16", - "shape": [ - 1, - 512, - 512, - 128 - ], - "size_in_bytes": 67108864, - "op_tensor_size": 67108864, - "offset": 3451944960 + "offset": 2537586688 }, "NhwcConv_0_weight_NHWC": { "packed_buffer_label": "const", @@ -11414,7 +9665,7 @@ "size_in_bytes": 400, "op_tensor_size": 400, "offset": 0, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_0.const", + "file_name": ".cache/NhwcConv_0-post_quant_convConv_0.const", "file_size": 400 }, "NhwcConv_1_weight_NHWC": { @@ -11427,10 +9678,10 @@ "size_in_bytes": 49664, "op_tensor_size": 49664, "offset": 400, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_1.const", + "file_name": ".cache/NhwcConv_0-post_quant_convConv_1.const", "file_size": 49664 }, - "GroupNorm_0_wts_4_1_0": { + "GroupNorm_0_wts_4_0_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11440,36 +9691,23 @@ "size_in_bytes": 2048, "op_tensor_size": 2048, "offset": 50064, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_2.const", + "file_name": ".cache/NhwcConv_0-post_quant_convConv_2.const", "file_size": 2048 }, - "Sigmoid_0.weights6_0": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 52112, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_3.const", - "file_size": 256 - }, "NhwcConv_2_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 52368, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_4.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 52112, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_3.const", + "file_size": 2916352 }, - "GroupNorm_1_wts_4_1_1": { + "GroupNorm_1_wts_4_0_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11478,37 +9716,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 2837648, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_5.const", + "offset": 2968464, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_4.const", "file_size": 2048 }, - "Sigmoid_1.weights6_1": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 2839696, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_6.const", - "file_size": 256 - }, "NhwcConv_3_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 2839952, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_7.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 2970512, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_5.const", + "file_size": 2916352 }, - "GroupNorm_2_wts_4_0_0": { + "GroupNorm_2_wts_4_1_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11517,11 +9742,11 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 5625232, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_8.const", + "offset": 5886864, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_6.const", "file_size": 2048 }, - "onnx::MatMul_918": { + "onnx::MatMul_918_2_0_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", @@ -11530,11 +9755,11 @@ ], "size_in_bytes": 299008, "op_tensor_size": 299008, - "offset": 5627280, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_9.const", + "offset": 5888912, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_7.const", "file_size": 299008 }, - "onnx::MatMul_917": { + "onnx::MatMul_917_2_0_1": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", @@ -11543,11 +9768,11 @@ ], "size_in_bytes": 299008, "op_tensor_size": 299008, - "offset": 5926288, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_10.const", + "offset": 6187920, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_8.const", "file_size": 299008 }, - "onnx::MatMul_919": { + "onnx::MatMul_919_2_0_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", @@ -11556,11 +9781,11 @@ ], "size_in_bytes": 299008, "op_tensor_size": 299008, - "offset": 6225296, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_11.const", + "offset": 6486928, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_9.const", "file_size": 299008 }, - "/decoder/mid_block/attentions.0/MatMulmha_1_0_mask.10_0": { + "/decoder/mid_block/attentions.0/MatMul/MatMulScaleFusion/_FusedActivationmha_3_1_0_mask.10_0": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11569,11 +9794,11 @@ ], "size_in_bytes": 8192, "op_tensor_size": 8192, - "offset": 6524304, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_12.const", + "offset": 6785936, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_10.const", "file_size": 8192 }, - "onnx::MatMul_927": { + "onnx::MatMul_927_2_0_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", @@ -11582,50 +9807,11 @@ ], "size_in_bytes": 299008, "op_tensor_size": 299008, - "offset": 6532496, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_13.const", + "offset": 6794128, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_11.const", "file_size": 299008 }, - "GroupNorm_3_wts_4_1_2": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 1024 - ], - "size_in_bytes": 2048, - "op_tensor_size": 2048, - "offset": 6831504, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_14.const", - "file_size": 2048 - }, - "Sigmoid_2.weights6_2": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 6833552, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_15.const", - "file_size": 256 - }, - "NhwcConv_4_weight_NHWC": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfp16ebs8", - "shape": [ - 2785280 - ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 6833808, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_16.const", - "file_size": 2785280 - }, - "GroupNorm_4_wts_4_1_3": { + "GroupNorm_3_wts_4_0_2": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11634,37 +9820,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 9619088, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_17.const", + "offset": 7093136, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_12.const", "file_size": 2048 }, - "Sigmoid_3.weights6_3": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 9621136, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_18.const", - "file_size": 256 - }, - "NhwcConv_5_weight_NHWC": { + "NhwcConv_4_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 9621392, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_19.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 7095184, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_13.const", + "file_size": 2916352 }, - "GroupNorm_5_wts_4_1_4": { + "GroupNorm_4_wts_4_0_3": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11673,37 +9846,50 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 12406672, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_20.const", + "offset": 10011536, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_14.const", "file_size": 2048 }, - "Sigmoid_4.weights6_4": { + "NhwcConv_5_weight_NHWC": { + "packed_buffer_label": "const", + "xrt_arg_id": 3, + "dtype": "bfp16ebs8", + "shape": [ + 2916352 + ], + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 10013584, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_15.const", + "file_size": 2916352 + }, + "GroupNorm_5_wts_4_0_4": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ - 128 + 1024 ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 12408720, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_21.const", - "file_size": 256 + "size_in_bytes": 2048, + "op_tensor_size": 2048, + "offset": 12929936, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_16.const", + "file_size": 2048 }, "NhwcConv_6_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 12408976, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_22.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 12931984, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_17.const", + "file_size": 2916352 }, - "GroupNorm_6_wts_4_1_5": { + "GroupNorm_6_wts_4_0_5": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11712,37 +9898,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 15194256, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_23.const", + "offset": 15848336, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_18.const", "file_size": 2048 }, - "Sigmoid_5.weights6_5": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 15196304, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_24.const", - "file_size": 256 - }, "NhwcConv_7_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 15196560, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_25.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 15850384, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_19.const", + "file_size": 2916352 }, - "GroupNorm_7_wts_4_1_6": { + "GroupNorm_7_wts_4_0_6": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11751,37 +9924,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 17981840, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_26.const", + "offset": 18766736, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_20.const", "file_size": 2048 }, - "Sigmoid_6.weights6_6": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 17983888, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_27.const", - "file_size": 256 - }, "NhwcConv_8_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 17984144, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_28.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 18768784, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_21.const", + "file_size": 2916352 }, - "GroupNorm_8_wts_4_1_7": { + "GroupNorm_8_wts_4_0_7": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11790,37 +9950,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 20769424, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_29.const", + "offset": 21685136, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_22.const", "file_size": 2048 }, - "Sigmoid_7.weights6_7": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 20771472, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_30.const", - "file_size": 256 - }, "NhwcConv_9_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 20771728, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_31.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 21687184, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_23.const", + "file_size": 2916352 }, - "GroupNorm_9_wts_4_1_8": { + "GroupNorm_9_wts_4_0_8": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11829,37 +9976,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 23557008, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_32.const", + "offset": 24603536, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_24.const", "file_size": 2048 }, - "Sigmoid_8.weights6_8": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 23559056, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_33.const", - "file_size": 256 - }, "NhwcConv_10_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 23559312, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_34.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 24605584, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_25.const", + "file_size": 2916352 }, - "GroupNorm_10_wts_4_1_9": { + "GroupNorm_10_wts_4_0_9": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11868,35 +10002,22 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 26344592, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_35.const", + "offset": 27521936, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_26.const", "file_size": 2048 }, - "Sigmoid_9.weights6_9": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 26346640, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_36.const", - "file_size": 256 - }, "NhwcConv_11_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 26346896, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_37.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 27523984, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_27.const", + "file_size": 2916352 }, "/decoder/up_blocks.0/upsamplers.0/Resize.weights7_0": { "packed_buffer_label": "const", @@ -11907,8 +10028,8 @@ ], "size_in_bytes": 256, "op_tensor_size": 256, - "offset": 29132176, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_38.const", + "offset": 30440336, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_28.const", "file_size": 256 }, "NhwcConv_12_weight_NHWC": { @@ -11916,15 +10037,15 @@ "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 29132432, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_39.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 30440592, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_29.const", + "file_size": 2916352 }, - "GroupNorm_11_wts_4_1_10": { + "GroupNorm_11_wts_4_0_10": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11933,37 +10054,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 31917712, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_40.const", + "offset": 33356944, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_30.const", "file_size": 2048 }, - "Sigmoid_10.weights6_10": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 31919760, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_41.const", - "file_size": 256 - }, "NhwcConv_13_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 31920016, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_42.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 33358992, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_31.const", + "file_size": 2916352 }, - "GroupNorm_12_wts_4_1_11": { + "GroupNorm_12_wts_4_0_11": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -11972,37 +10080,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 34705296, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_43.const", + "offset": 36275344, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_32.const", "file_size": 2048 }, - "Sigmoid_11.weights6_11": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 34707344, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_44.const", - "file_size": 256 - }, "NhwcConv_14_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 34707600, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_45.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 36277392, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_33.const", + "file_size": 2916352 }, - "GroupNorm_13_wts_4_1_12": { + "GroupNorm_13_wts_4_0_12": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12011,37 +10106,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 37492880, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_46.const", + "offset": 39193744, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_34.const", "file_size": 2048 }, - "Sigmoid_12.weights6_12": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 37494928, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_47.const", - "file_size": 256 - }, "NhwcConv_15_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 37495184, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_48.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 39195792, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_35.const", + "file_size": 2916352 }, - "GroupNorm_14_wts_4_1_13": { + "GroupNorm_14_wts_4_0_13": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12050,37 +10132,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 40280464, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_49.const", + "offset": 42112144, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_36.const", "file_size": 2048 }, - "Sigmoid_13.weights6_13": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 40282512, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_50.const", - "file_size": 256 - }, "NhwcConv_16_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 40282768, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_51.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 42114192, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_37.const", + "file_size": 2916352 }, - "GroupNorm_15_wts_4_1_14": { + "GroupNorm_15_wts_4_0_14": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12089,37 +10158,24 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 43068048, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_52.const", + "offset": 45030544, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_38.const", "file_size": 2048 }, - "Sigmoid_14.weights6_14": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 43070096, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_53.const", - "file_size": 256 - }, "NhwcConv_17_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 43070352, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_54.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 45032592, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_39.const", + "file_size": 2916352 }, - "GroupNorm_16_wts_4_1_15": { + "GroupNorm_16_wts_4_0_15": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12128,35 +10184,22 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 45855632, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_55.const", + "offset": 47948944, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_40.const", "file_size": 2048 }, - "Sigmoid_15.weights6_15": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 45857680, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_56.const", - "file_size": 256 - }, "NhwcConv_18_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 45857936, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_57.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 47950992, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_41.const", + "file_size": 2916352 }, "/decoder/up_blocks.1/upsamplers.0/Resize.weights7_1": { "packed_buffer_label": "const", @@ -12167,8 +10210,8 @@ ], "size_in_bytes": 256, "op_tensor_size": 256, - "offset": 48643216, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_58.const", + "offset": 50867344, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_42.const", "file_size": 256 }, "NhwcConv_19_weight_NHWC": { @@ -12176,15 +10219,15 @@ "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 2785280 + 2916352 ], - "size_in_bytes": 2785280, - "op_tensor_size": 2785280, - "offset": 48643472, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_59.const", - "file_size": 2785280 + "size_in_bytes": 2916352, + "op_tensor_size": 2916352, + "offset": 50867600, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_43.const", + "file_size": 2916352 }, - "GroupNorm_17_wts_4_1_16": { + "GroupNorm_17_wts_4_0_16": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12193,8 +10236,8 @@ ], "size_in_bytes": 2048, "op_tensor_size": 2048, - "offset": 51428752, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_60.const", + "offset": 53783952, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_44.const", "file_size": 2048 }, "NhwcConv_20_weight_NHWC": { @@ -12206,37 +10249,24 @@ ], "size_in_bytes": 163840, "op_tensor_size": 163840, - "offset": 51430800, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_61.const", + "offset": 53786000, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_45.const", "file_size": 163840 }, - "Sigmoid_16.weights6_16": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 51594640, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_62.const", - "file_size": 256 - }, "NhwcConv_21_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 1392640 + 1458176 ], - "size_in_bytes": 1392640, - "op_tensor_size": 1392640, - "offset": 51594896, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_63.const", - "file_size": 1392640 + "size_in_bytes": 1458176, + "op_tensor_size": 1458176, + "offset": 53949840, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_46.const", + "file_size": 1458176 }, - "GroupNorm_18_wts_4_1_17": { + "GroupNorm_18_wts_4_0_17": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12245,37 +10275,24 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 52987536, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_64.const", + "offset": 55408016, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_47.const", "file_size": 1024 }, - "Sigmoid_17.weights6_17": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 52988560, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_65.const", - "file_size": 256 - }, "NhwcConv_22_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 52988816, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_66.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 55409040, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_48.const", + "file_size": 729088 }, - "GroupNorm_19_wts_4_1_18": { + "GroupNorm_19_wts_4_0_18": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12284,37 +10301,24 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 53685136, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_67.const", + "offset": 56138128, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_49.const", "file_size": 1024 }, - "Sigmoid_18.weights6_18": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 53686160, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_68.const", - "file_size": 256 - }, "NhwcConv_23_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 53686416, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_69.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 56139152, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_50.const", + "file_size": 729088 }, - "GroupNorm_20_wts_4_1_19": { + "GroupNorm_20_wts_4_0_19": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12323,37 +10327,24 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 54382736, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_70.const", + "offset": 56868240, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_51.const", "file_size": 1024 }, - "Sigmoid_19.weights6_19": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 54383760, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_71.const", - "file_size": 256 - }, "NhwcConv_24_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 54384016, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_72.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 56869264, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_52.const", + "file_size": 729088 }, - "GroupNorm_21_wts_4_1_20": { + "GroupNorm_21_wts_4_0_20": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12362,37 +10353,24 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 55080336, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_73.const", + "offset": 57598352, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_53.const", "file_size": 1024 }, - "Sigmoid_20.weights6_20": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 55081360, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_74.const", - "file_size": 256 - }, "NhwcConv_25_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 55081616, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_75.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 57599376, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_54.const", + "file_size": 729088 }, - "GroupNorm_22_wts_4_1_21": { + "GroupNorm_22_wts_4_0_21": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12401,35 +10379,22 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 55777936, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_76.const", + "offset": 58328464, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_55.const", "file_size": 1024 }, - "Sigmoid_21.weights6_21": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 55778960, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_77.const", - "file_size": 256 - }, "NhwcConv_26_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 55779216, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_78.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 58329488, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_56.const", + "file_size": 729088 }, "/decoder/up_blocks.2/upsamplers.0/Resize.weights7_2": { "packed_buffer_label": "const", @@ -12440,8 +10405,8 @@ ], "size_in_bytes": 256, "op_tensor_size": 256, - "offset": 56475536, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_79.const", + "offset": 59058576, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_57.const", "file_size": 256 }, "NhwcConv_27_weight_NHWC": { @@ -12449,15 +10414,15 @@ "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 696320 + 729088 ], - "size_in_bytes": 696320, - "op_tensor_size": 696320, - "offset": 56475792, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_80.const", - "file_size": 696320 + "size_in_bytes": 729088, + "op_tensor_size": 729088, + "offset": 59058832, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_58.const", + "file_size": 729088 }, - "GroupNorm_23_wts_4_1_22": { + "GroupNorm_23_wts_4_0_22": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12466,8 +10431,8 @@ ], "size_in_bytes": 1024, "op_tensor_size": 1024, - "offset": 57172112, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_81.const", + "offset": 59787920, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_59.const", "file_size": 1024 }, "NhwcConv_28_weight_NHWC": { @@ -12479,37 +10444,24 @@ ], "size_in_bytes": 40960, "op_tensor_size": 40960, - "offset": 57173136, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_82.const", + "offset": 59788944, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_60.const", "file_size": 40960 }, - "Sigmoid_22.weights6_22": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 57214096, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_83.const", - "file_size": 256 - }, "NhwcConv_29_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfp16ebs8", "shape": [ - 348160 + 364544 ], - "size_in_bytes": 348160, - "op_tensor_size": 348160, - "offset": 57214352, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_84.const", - "file_size": 348160 + "size_in_bytes": 364544, + "op_tensor_size": 364544, + "offset": 59829904, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_61.const", + "file_size": 364544 }, - "GroupNorm_24_wts_4_1_23": { + "GroupNorm_24_wts_4_0_23": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12518,23 +10470,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 57562512, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_85.const", + "offset": 60194448, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_62.const", "file_size": 512 }, - "Sigmoid_23.weights6_23": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 57563024, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_86.const", - "file_size": 256 - }, "NhwcConv_30_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12544,11 +10483,11 @@ ], "size_in_bytes": 174080, "op_tensor_size": 174080, - "offset": 57563280, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_87.const", + "offset": 60194960, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_63.const", "file_size": 174080 }, - "GroupNorm_25_wts_4_1_24": { + "GroupNorm_25_wts_4_0_24": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12557,23 +10496,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 57737360, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_88.const", + "offset": 60369040, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_64.const", "file_size": 512 }, - "Sigmoid_24.weights6_24": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 57737872, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_89.const", - "file_size": 256 - }, "NhwcConv_31_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12583,11 +10509,11 @@ ], "size_in_bytes": 174080, "op_tensor_size": 174080, - "offset": 57738128, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_90.const", + "offset": 60369552, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_65.const", "file_size": 174080 }, - "GroupNorm_26_wts_4_1_25": { + "GroupNorm_26_wts_4_0_25": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12596,23 +10522,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 57912208, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_91.const", + "offset": 60543632, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_66.const", "file_size": 512 }, - "Sigmoid_25.weights6_25": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 57912720, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_92.const", - "file_size": 256 - }, "NhwcConv_32_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12622,11 +10535,11 @@ ], "size_in_bytes": 174080, "op_tensor_size": 174080, - "offset": 57912976, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_93.const", + "offset": 60544144, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_67.const", "file_size": 174080 }, - "GroupNorm_27_wts_4_1_26": { + "GroupNorm_27_wts_4_0_26": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12635,23 +10548,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 58087056, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_94.const", + "offset": 60718224, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_68.const", "file_size": 512 }, - "Sigmoid_26.weights6_26": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 58087568, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_95.const", - "file_size": 256 - }, "NhwcConv_33_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12661,11 +10561,11 @@ ], "size_in_bytes": 174080, "op_tensor_size": 174080, - "offset": 58087824, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_96.const", + "offset": 60718736, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_69.const", "file_size": 174080 }, - "GroupNorm_28_wts_4_1_27": { + "GroupNorm_28_wts_4_0_27": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12674,23 +10574,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 58261904, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_97.const", + "offset": 60892816, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_70.const", "file_size": 512 }, - "Sigmoid_27.weights6_27": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 58262416, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_98.const", - "file_size": 256 - }, "NhwcConv_34_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12700,11 +10587,11 @@ ], "size_in_bytes": 174080, "op_tensor_size": 174080, - "offset": 58262672, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_99.const", + "offset": 60893328, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_71.const", "file_size": 174080 }, - "GroupNorm_29_wts_4_1_28": { + "GroupNorm_29_wts_4_0_28": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", @@ -12713,23 +10600,10 @@ ], "size_in_bytes": 512, "op_tensor_size": 512, - "offset": 58436752, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_100.const", + "offset": 61067408, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_72.const", "file_size": 512 }, - "Sigmoid_28.weights6_28": { - "packed_buffer_label": "const", - "xrt_arg_id": 3, - "dtype": "bfloat16", - "shape": [ - 128 - ], - "size_in_bytes": 256, - "op_tensor_size": 256, - "offset": 58437264, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_101.const", - "file_size": 256 - }, "NhwcConv_35_weight_NHWC": { "packed_buffer_label": "const", "xrt_arg_id": 3, @@ -12739,8 +10613,8 @@ ], "size_in_bytes": 21760, "op_tensor_size": 21760, - "offset": 58437520, - "file_name": ".cache\\NhwcConv_0-post_quant_convConv_102.const", + "offset": 61067920, + "file_name": ".cache/NhwcConv_0-post_quant_convConv_73.const", "file_size": 21760 } },