Instructions to use stabilityai/stable-diffusion-3-medium-amdnpu with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusion Single File
How to use stabilityai/stable-diffusion-3-medium-amdnpu with Diffusion Single File:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
stable-diffusion-3-medium-amdnpu / normal /controlnet-pose /dynamic /dd /cache /pos_embedprojConv_meta.json
| { | |
| "dd_meta_major_version": 1, | |
| "dd_meta_minor_version": 4, | |
| "state_table_updates": [], | |
| "op_list": [ | |
| { | |
| "name": "/pos_embed/proj/Conv", | |
| "type": "SDConv", | |
| "in_args": [ | |
| "hidden_states_nhwc.out5_0_0" | |
| ], | |
| "const_args": [ | |
| "pos_embed.proj.weight" | |
| ], | |
| "out_args": [ | |
| "/pos_embed/Transpose_output_0.out5_0_0" | |
| ], | |
| "attrs": { | |
| "auto_pad": { | |
| "type": "str", | |
| "value": [ | |
| "NOTSET" | |
| ] | |
| }, | |
| "dilations": { | |
| "type": "int", | |
| "value": [ | |
| "1", | |
| "1" | |
| ] | |
| }, | |
| "group": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "kernel_shape": { | |
| "type": "int", | |
| "value": [ | |
| "2", | |
| "2" | |
| ] | |
| }, | |
| "pads": { | |
| "type": "int", | |
| "value": [ | |
| "0", | |
| "0", | |
| "0", | |
| "0" | |
| ] | |
| }, | |
| "strides": { | |
| "type": "int", | |
| "value": [ | |
| "2", | |
| "2" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "w", | |
| "h", | |
| "16" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(w/2)", | |
| "floor(h/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "2", | |
| "2", | |
| "16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "float" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/pos_embed/Add_2", | |
| "type": "SDAdd", | |
| "in_args": [ | |
| "/pos_embed/Transpose_output_0.out5_0_0", | |
| "/pos_embed/Reshape_1_output_0.out_35_1_2" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/pos_embed/Add_2_output_0.out_35_1_2" | |
| ], | |
| "attrs": { | |
| "a_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "b_shape": { | |
| "type": "str", | |
| "value": [ | |
| "1", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "c_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "is_bias_add": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/pos_embed_input/proj/Conv", | |
| "type": "SDConv", | |
| "in_args": [ | |
| "controlnet_cond_nhwc.out5_0_1" | |
| ], | |
| "const_args": [ | |
| "pos_embed_input.proj.weight" | |
| ], | |
| "out_args": [ | |
| "/pos_embed_input/Transpose_output_0.out5_0_1" | |
| ], | |
| "attrs": { | |
| "auto_pad": { | |
| "type": "str", | |
| "value": [ | |
| "NOTSET" | |
| ] | |
| }, | |
| "dilations": { | |
| "type": "int", | |
| "value": [ | |
| "1", | |
| "1" | |
| ] | |
| }, | |
| "group": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "kernel_shape": { | |
| "type": "int", | |
| "value": [ | |
| "2", | |
| "2" | |
| ] | |
| }, | |
| "pads": { | |
| "type": "int", | |
| "value": [ | |
| "0", | |
| "0", | |
| "0", | |
| "0" | |
| ] | |
| }, | |
| "strides": { | |
| "type": "int", | |
| "value": [ | |
| "2", | |
| "2" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "w", | |
| "h", | |
| "16" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(w/2)", | |
| "floor(h/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "2", | |
| "2", | |
| "16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "float" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/Add", | |
| "type": "SDAdd", | |
| "in_args": [ | |
| "/pos_embed/Add_2_output_0.out_35_1_2", | |
| "/pos_embed_input/Transpose_output_0.out5_0_1" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/Add_output_0.out_35_1_3" | |
| ], | |
| "attrs": { | |
| "a_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "b_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "c_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "is_bias_add": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/timestep_embedder/linear_1/Gemm", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/time_text_embed/Cast_output_0.out17_3_3" | |
| ], | |
| "const_args": [ | |
| "time_text_embed.timestep_embedder.linear_1.weight_5_1_2" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "256" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "256", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/timestep_embedder/act/Sigmoid", | |
| "type": "SDSilu", | |
| "in_args": [ | |
| "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3" | |
| ], | |
| "const_args": [ | |
| "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "128" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/timestep_embedder/linear_2/Gemm", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1" | |
| ], | |
| "const_args": [ | |
| "time_text_embed.timestep_embedder.linear_2.weight_5_1_3" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/text_embedder/linear_1/Gemm", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "pooled_projections.out17_3_1" | |
| ], | |
| "const_args": [ | |
| "time_text_embed.text_embedder.linear_1.weight_5_1_0" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "2048" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "2048", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/text_embedder/act_1/Sigmoid", | |
| "type": "SDSilu", | |
| "in_args": [ | |
| "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1" | |
| ], | |
| "const_args": [ | |
| "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "128" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/text_embedder/linear_2/Gemm", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0" | |
| ], | |
| "const_args": [ | |
| "time_text_embed.text_embedder.linear_2.weight_5_1_1" | |
| ], | |
| "out_args": [ | |
| "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/time_text_embed/Add", | |
| "type": "SDAdd", | |
| "in_args": [ | |
| "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", | |
| "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/time_text_embed/Add_output_0.out_35_1_4" | |
| ], | |
| "attrs": { | |
| "a_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "b_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "c_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "is_bias_add": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm1/silu/Sigmoid", | |
| "type": "SDSilu", | |
| "in_args": [ | |
| "/time_text_embed/Add_output_0.out_35_1_4" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "128" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "encoder_hidden_states.out17_3_0_SDCastBf2Bfp", | |
| "type": "SDCastBf2Bfp", | |
| "in_args": [ | |
| "encoder_hidden_states.out17_3_0" | |
| ], | |
| "const_args": [ | |
| "encoder_hidden_states.out17_3_0_bfp.wts" | |
| ], | |
| "out_args": [ | |
| "encoder_hidden_states.out17_3_0_bfp.out25_0" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "4096" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "4096" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/context_embedder/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "encoder_hidden_states.out17_3_0_bfp.out25_0" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1943" | |
| ], | |
| "out_args": [ | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "4096" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "4096", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_SDCastBfp2Bf", | |
| "type": "SDCastBfp2Bf", | |
| "in_args": [ | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0" | |
| ], | |
| "const_args": [ | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts" | |
| ], | |
| "out_args": [ | |
| "/context_embedder/Add_output_0.out17_3_0" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/context_embedder/Add_output_0.out17_3_0" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.0/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/Add_output_0.out_35_1_3" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.0/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.0/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1947_onnx::MatMul_1944" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1948_onnx::MatMul_1945" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1949_onnx::MatMul_1946" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/MatMulmha_18_0_0", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", | |
| "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", | |
| "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/to_add_out/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1964" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "end": { | |
| "type": "int", | |
| "value": [ | |
| "2147483647" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add_4", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", | |
| "/context_embedder/Add_output_0.out17_3_0" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_4_output_0.out10_0" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm2_context/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_4_output_0.out10_0" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.0/norm2_context/Constant_output_0", | |
| "/transformer_blocks.0/norm2_context/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add_6", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1963" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/ff_context/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1967" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/ff_context/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1968" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add_7", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", | |
| "/transformer_blocks.0/Add_4_output_0.out10_0" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_7_output_0.out10_1" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", | |
| "/Add_output_0.out_35_1_3" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_output_0.out10_20" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_output_0.out10_20" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.0/norm2/Constant_output_0", | |
| "/transformer_blocks.0/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1965" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1966" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.0/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", | |
| "/transformer_blocks.0/Add_output_0.out10_20" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.0/Add_3_output_0.out10_21" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_3_output_0.out10_21" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.1/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.1/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.0/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_3_output_0.out10_21" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2088" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.0/Add_output_0.out17_3_13" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/Add_7_output_0.out10_1" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.1/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1972_onnx::MatMul_1969" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1973_onnx::MatMul_1970" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1974_onnx::MatMul_1971" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/MatMulmha_18_0_1", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", | |
| "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", | |
| "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1988" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", | |
| "/transformer_blocks.0/Add_3_output_0.out10_21" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_output_0.out10_2" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_output_0.out10_2" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.1/norm2/Constant_output_0", | |
| "/transformer_blocks.1/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1990" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1991" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", | |
| "/transformer_blocks.1/Add_output_0.out10_2" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_3_output_0.out10_3" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.1/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_3_output_0.out10_3" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2089" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.1/Add_output_0.out17_3_22" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/attn/to_add_out/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1989" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "end": { | |
| "type": "int", | |
| "value": [ | |
| "2147483647" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add_4", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", | |
| "/transformer_blocks.0/Add_7_output_0.out10_1" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_4_output_0.out10_4" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/norm2_context/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_4_output_0.out10_4" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.1/norm2_context/Constant_output_0", | |
| "/transformer_blocks.1/norm2_context/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add_6", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/ff_context/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1992" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/ff_context/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1993" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.1/Add_7", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", | |
| "/transformer_blocks.1/Add_4_output_0.out10_4" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.1/Add_7_output_0.out10_5" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_3_output_0.out10_3" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.2/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.2/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.1/Add_7_output_0.out10_5" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.2/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1997_onnx::MatMul_1994" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1998_onnx::MatMul_1995" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_1999_onnx::MatMul_1996" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/MatMulmha_18_0_2", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", | |
| "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", | |
| "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2013" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", | |
| "/transformer_blocks.1/Add_3_output_0.out10_3" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_output_0.out10_6" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_output_0.out10_6" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.2/norm2/Constant_output_0", | |
| "/transformer_blocks.2/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2015" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2016" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", | |
| "/transformer_blocks.2/Add_output_0.out10_6" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_3_output_0.out10_7" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.2/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_3_output_0.out10_7" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2090" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.2/Add_output_0.out17_3_31" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/attn/to_add_out/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2014" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "end": { | |
| "type": "int", | |
| "value": [ | |
| "2147483647" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add_4", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", | |
| "/transformer_blocks.1/Add_7_output_0.out10_5" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_4_output_0.out10_8" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/norm2_context/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_4_output_0.out10_8" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.2/norm2_context/Constant_output_0", | |
| "/transformer_blocks.2/norm2_context/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add_6", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/ff_context/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2017" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/ff_context/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2018" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.2/Add_7", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", | |
| "/transformer_blocks.2/Add_4_output_0.out10_8" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.2/Add_7_output_0.out10_9" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_3_output_0.out10_7" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.3/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.3/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.2/Add_7_output_0.out10_9" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.3/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2022_onnx::MatMul_2019" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2023_onnx::MatMul_2020" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2024_onnx::MatMul_2021" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/MatMulmha_18_0_3", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", | |
| "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", | |
| "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2038" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", | |
| "/transformer_blocks.2/Add_3_output_0.out10_7" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_output_0.out10_10" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_output_0.out10_10" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.3/norm2/Constant_output_0", | |
| "/transformer_blocks.3/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2040" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2041" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", | |
| "/transformer_blocks.3/Add_output_0.out10_10" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_3_output_0.out10_11" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.3/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_3_output_0.out10_11" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2091" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.3/Add_output_0.out17_3_40" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/attn/to_add_out/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2039" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "end": { | |
| "type": "int", | |
| "value": [ | |
| "2147483647" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add_4", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", | |
| "/transformer_blocks.2/Add_7_output_0.out10_9" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_4_output_0.out10_12" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/norm2_context/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_4_output_0.out10_12" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.3/norm2_context/Constant_output_0", | |
| "/transformer_blocks.3/norm2_context/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add_6", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/ff_context/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2042" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/ff_context/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2043" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.3/Add_7", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", | |
| "/transformer_blocks.3/Add_4_output_0.out10_12" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.3/Add_7_output_0.out10_13" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_3_output_0.out10_11" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.4/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.4/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.3/Add_7_output_0.out10_13" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.4/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2047_onnx::MatMul_2044" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2048_onnx::MatMul_2045" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2049_onnx::MatMul_2046" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/MatMulmha_18_0_4", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", | |
| "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", | |
| "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2063" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", | |
| "/transformer_blocks.3/Add_3_output_0.out10_11" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_output_0.out10_14" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_output_0.out10_14" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.4/norm2/Constant_output_0", | |
| "/transformer_blocks.4/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2065" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2066" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", | |
| "/transformer_blocks.4/Add_output_0.out10_14" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_3_output_0.out10_15" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.4/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_3_output_0.out10_15" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2092" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.4/Add_output_0.out17_3_49" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/attn/to_add_out/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2064" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "end": { | |
| "type": "int", | |
| "value": [ | |
| "2147483647" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add_4", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", | |
| "/transformer_blocks.3/Add_7_output_0.out10_13" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_4_output_0.out10_16" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/norm2_context/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_4_output_0.out10_16" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.4/norm2_context/Constant_output_0", | |
| "/transformer_blocks.4/norm2_context/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add_6", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/ff_context/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2067" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/ff_context/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2068" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.4/Add_7", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", | |
| "/transformer_blocks.4/Add_4_output_0.out10_16" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.4/Add_7_output_0.out10_17" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/norm1/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_3_output_0.out10_15" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.5/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.5/norm1/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/norm1/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/norm1_context/norm/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.4/Add_7_output_0.out10_17" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.5/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/norm1_context/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/attn/Concat", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2072_onnx::MatMul_2069" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/attn/Concat_1", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2073_onnx::MatMul_2070" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16" | |
| ], | |
| "attrs": { | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/attn/Concat_2", | |
| "type": "SDGemmConcat_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2074_onnx::MatMul_2071" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" | |
| ], | |
| "attrs": { | |
| "head_num": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "concat_axis": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "trans_head": { | |
| "type": "int", | |
| "value": [ | |
| "3" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/attn/MatMulmha_18_0_5", | |
| "type": "SDMHA_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", | |
| "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", | |
| "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17" | |
| ], | |
| "const_args": [], | |
| "out_args": [ | |
| "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" | |
| ], | |
| "attrs": { | |
| "num_heads": { | |
| "type": "int", | |
| "value": [ | |
| "24" | |
| ] | |
| }, | |
| "unidirectional": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "64", | |
| "max_length + floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "op_version": { | |
| "type": "str", | |
| "value": [ | |
| "v2" | |
| ] | |
| }, | |
| "is_flash_mha": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/attn/to_out.0/MatMul", | |
| "type": "SDSliceGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2085" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10" | |
| ], | |
| "attrs": { | |
| "axes": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "slice_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "start": { | |
| "type": "int", | |
| "value": [ | |
| "0" | |
| ] | |
| }, | |
| "end": { | |
| "type": "str", | |
| "value": [ | |
| "floor(h/2)*floor(w/2)" | |
| ] | |
| }, | |
| "step": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/Add", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", | |
| "/transformer_blocks.4/Add_3_output_0.out10_15" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/Add_output_0.out10_18" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/norm2/LayerNormalization", | |
| "type": "SDLayerNorm_bfbfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/Add_output_0.out10_18" | |
| ], | |
| "const_args": [ | |
| "/transformer_blocks.5/norm2/Constant_output_0", | |
| "/transformer_blocks.5/norm2/Constant_1_output_0" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" | |
| ], | |
| "attrs": { | |
| "axis": { | |
| "type": "int", | |
| "value": [ | |
| "-1" | |
| ] | |
| }, | |
| "epsilon": { | |
| "type": "float", | |
| "value": [ | |
| "9.999999974752427e-07" | |
| ] | |
| }, | |
| "stash_type": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "gamma_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "beta_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfloat16" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/Add_2", | |
| "type": "SDGemmGemmMulAdd_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_1": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/ff/net.0/proj/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2086" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "6144" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "nonlinear": { | |
| "type": "str", | |
| "value": [ | |
| "Gelu" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/ff/net.2/MatMul", | |
| "type": "SDGemm_bfp", | |
| "in_args": [ | |
| "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2087" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "6144" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "6144", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfp16ebs8", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/transformer_blocks.5/Add_3", | |
| "type": "SDGemmMulAdd_bfpbfbf", | |
| "in_args": [ | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31", | |
| "/transformer_blocks.5/Add_output_0.out10_18" | |
| ], | |
| "const_args": [ | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma" | |
| ], | |
| "out_args": [ | |
| "/transformer_blocks.5/Add_3_output_0.out10_19" | |
| ], | |
| "attrs": { | |
| "input_shape_0": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "1", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_1": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "input_shape_2": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape_0": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16", | |
| "bfp16ebs8" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| }, | |
| { | |
| "name": "/controlnet_blocks.5/MatMul", | |
| "type": "SDGemm", | |
| "in_args": [ | |
| "/transformer_blocks.5/Add_3_output_0.out10_19" | |
| ], | |
| "const_args": [ | |
| "onnx::MatMul_2093" | |
| ], | |
| "out_args": [ | |
| "/controlnet_blocks.5/Add_output_0.out17_3_57" | |
| ], | |
| "attrs": { | |
| "input_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "output_shape": { | |
| "type": "str", | |
| "value": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "1536" | |
| ] | |
| }, | |
| "weight_shape": { | |
| "type": "int", | |
| "value": [ | |
| "1536", | |
| "1536" | |
| ] | |
| }, | |
| "out_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16" | |
| ] | |
| }, | |
| "bias_enable": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| }, | |
| "in_dtypes": { | |
| "type": "str", | |
| "value": [ | |
| "bfloat16", | |
| "bfp16ebs8", | |
| "bfloat16" | |
| ] | |
| }, | |
| "ctrl_packet": { | |
| "type": "int", | |
| "value": [ | |
| "1" | |
| ] | |
| } | |
| } | |
| } | |
| ], | |
| "fused_tensors": { | |
| "in": { | |
| "buffer_size": 15936, | |
| "xrt_arg_id": 0, | |
| "packed_tensors": [ | |
| "hidden_states_nhwc.out5_0_0", | |
| "/pos_embed/Reshape_1_output_0.out_35_1_2", | |
| "controlnet_cond_nhwc.out5_0_1", | |
| "/time_text_embed/Cast_output_0.out17_3_3", | |
| "pooled_projections.out17_3_1", | |
| "encoder_hidden_states.out17_3_0" | |
| ] | |
| }, | |
| "out": { | |
| "buffer_size": 18432, | |
| "xrt_arg_id": 1, | |
| "packed_tensors": [ | |
| "/controlnet_blocks.0/Add_output_0.out17_3_13", | |
| "/controlnet_blocks.1/Add_output_0.out17_3_22", | |
| "/controlnet_blocks.2/Add_output_0.out17_3_31", | |
| "/controlnet_blocks.3/Add_output_0.out17_3_40", | |
| "/controlnet_blocks.4/Add_output_0.out17_3_49", | |
| "/controlnet_blocks.5/Add_output_0.out17_3_57" | |
| ] | |
| }, | |
| "scratch": { | |
| "buffer_size": 322048, | |
| "xrt_arg_id": 2, | |
| "packed_tensors": [ | |
| "/pos_embed/Transpose_output_0.out5_0_0", | |
| "/pos_embed/Add_2_output_0.out_35_1_2", | |
| "/pos_embed_input/Transpose_output_0.out5_0_1", | |
| "/Add_output_0.out_35_1_3", | |
| "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3", | |
| "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1", | |
| "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4", | |
| "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1", | |
| "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0", | |
| "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2", | |
| "/time_text_embed/Add_output_0.out_35_1_4", | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2", | |
| "encoder_hidden_states.out17_3_0_bfp.out25_0", | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0", | |
| "/context_embedder/Add_output_0.out17_3_0", | |
| "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0", | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0", | |
| "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1", | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22", | |
| "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0", | |
| "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1", | |
| "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2", | |
| "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0", | |
| "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1", | |
| "/transformer_blocks.0/Add_4_output_0.out10_0", | |
| "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3", | |
| "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1", | |
| "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0", | |
| "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6", | |
| "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8", | |
| "/transformer_blocks.0/Add_7_output_0.out10_1", | |
| "/transformer_blocks.0/Add_output_0.out10_20", | |
| "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2", | |
| "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21", | |
| "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5", | |
| "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7", | |
| "/transformer_blocks.0/Add_3_output_0.out10_21", | |
| "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4", | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2", | |
| "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5", | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4", | |
| "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3", | |
| "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4", | |
| "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5", | |
| "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1", | |
| "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2", | |
| "/transformer_blocks.1/Add_output_0.out10_2", | |
| "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6", | |
| "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3", | |
| "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10", | |
| "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12", | |
| "/transformer_blocks.1/Add_3_output_0.out10_3", | |
| "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3", | |
| "/transformer_blocks.1/Add_4_output_0.out10_4", | |
| "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7", | |
| "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5", | |
| "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11", | |
| "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13", | |
| "/transformer_blocks.1/Add_7_output_0.out10_5", | |
| "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8", | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6", | |
| "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9", | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8", | |
| "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6", | |
| "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7", | |
| "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8", | |
| "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2", | |
| "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4", | |
| "/transformer_blocks.2/Add_output_0.out10_6", | |
| "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10", | |
| "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7", | |
| "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15", | |
| "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17", | |
| "/transformer_blocks.2/Add_3_output_0.out10_7", | |
| "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5", | |
| "/transformer_blocks.2/Add_4_output_0.out10_8", | |
| "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11", | |
| "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9", | |
| "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16", | |
| "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18", | |
| "/transformer_blocks.2/Add_7_output_0.out10_9", | |
| "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12", | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10", | |
| "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13", | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12", | |
| "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9", | |
| "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10", | |
| "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11", | |
| "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3", | |
| "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6", | |
| "/transformer_blocks.3/Add_output_0.out10_10", | |
| "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14", | |
| "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11", | |
| "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20", | |
| "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22", | |
| "/transformer_blocks.3/Add_3_output_0.out10_11", | |
| "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7", | |
| "/transformer_blocks.3/Add_4_output_0.out10_12", | |
| "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15", | |
| "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13", | |
| "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21", | |
| "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23", | |
| "/transformer_blocks.3/Add_7_output_0.out10_13", | |
| "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16", | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14", | |
| "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17", | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16", | |
| "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12", | |
| "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13", | |
| "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14", | |
| "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4", | |
| "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8", | |
| "/transformer_blocks.4/Add_output_0.out10_14", | |
| "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18", | |
| "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15", | |
| "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25", | |
| "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27", | |
| "/transformer_blocks.4/Add_3_output_0.out10_15", | |
| "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9", | |
| "/transformer_blocks.4/Add_4_output_0.out10_16", | |
| "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19", | |
| "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17", | |
| "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26", | |
| "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28", | |
| "/transformer_blocks.4/Add_7_output_0.out10_17", | |
| "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20", | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18", | |
| "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21", | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20", | |
| "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15", | |
| "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16", | |
| "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17", | |
| "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5", | |
| "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10", | |
| "/transformer_blocks.5/Add_output_0.out10_18", | |
| "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22", | |
| "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19", | |
| "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30", | |
| "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31", | |
| "/transformer_blocks.5/Add_3_output_0.out10_19" | |
| ] | |
| }, | |
| "const": { | |
| "buffer_size": 579539968, | |
| "xrt_arg_id": 3, | |
| "packed_tensors": [ | |
| "pos_embed.proj.weight", | |
| "pos_embed_input.proj.weight", | |
| "time_text_embed.timestep_embedder.linear_1.weight_5_1_2", | |
| "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1", | |
| "time_text_embed.timestep_embedder.linear_2.weight_5_1_3", | |
| "time_text_embed.text_embedder.linear_1.weight_5_1_0", | |
| "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0", | |
| "time_text_embed.text_embedder.linear_2.weight_5_1_1", | |
| "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2", | |
| "encoder_hidden_states.out17_3_0_bfp.wts", | |
| "onnx::MatMul_1943", | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts", | |
| "/transformer_blocks.0/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0", | |
| "/transformer_blocks.0/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.0/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3", | |
| "onnx::MatMul_1947_onnx::MatMul_1944", | |
| "onnx::MatMul_1948_onnx::MatMul_1945", | |
| "onnx::MatMul_1949_onnx::MatMul_1946", | |
| "onnx::MatMul_1964", | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma", | |
| "/transformer_blocks.0/norm2_context/Constant_output_0", | |
| "/transformer_blocks.0/norm2_context/Constant_1_output_0", | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2", | |
| "onnx::MatMul_1963", | |
| "onnx::MatMul_1967", | |
| "onnx::MatMul_1968", | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma", | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma", | |
| "/transformer_blocks.0/norm2/Constant_output_0", | |
| "/transformer_blocks.0/norm2/Constant_1_output_0", | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0", | |
| "onnx::MatMul_1965", | |
| "onnx::MatMul_1966", | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma", | |
| "/transformer_blocks.1/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.1/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1", | |
| "onnx::MatMul_2088", | |
| "/transformer_blocks.1/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1", | |
| "onnx::MatMul_1972_onnx::MatMul_1969", | |
| "onnx::MatMul_1973_onnx::MatMul_1970", | |
| "onnx::MatMul_1974_onnx::MatMul_1971", | |
| "onnx::MatMul_1988", | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma", | |
| "/transformer_blocks.1/norm2/Constant_output_0", | |
| "/transformer_blocks.1/norm2/Constant_1_output_0", | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4", | |
| "onnx::MatMul_1990", | |
| "onnx::MatMul_1991", | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma", | |
| "onnx::MatMul_2089", | |
| "onnx::MatMul_1989", | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma", | |
| "/transformer_blocks.1/norm2_context/Constant_output_0", | |
| "/transformer_blocks.1/norm2_context/Constant_1_output_0", | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4", | |
| "onnx::MatMul_1992", | |
| "onnx::MatMul_1993", | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma", | |
| "/transformer_blocks.2/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.2/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1", | |
| "/transformer_blocks.2/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1", | |
| "onnx::MatMul_1997_onnx::MatMul_1994", | |
| "onnx::MatMul_1998_onnx::MatMul_1995", | |
| "onnx::MatMul_1999_onnx::MatMul_1996", | |
| "onnx::MatMul_2013", | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma", | |
| "/transformer_blocks.2/norm2/Constant_output_0", | |
| "/transformer_blocks.2/norm2/Constant_1_output_0", | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4", | |
| "onnx::MatMul_2015", | |
| "onnx::MatMul_2016", | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma", | |
| "onnx::MatMul_2090", | |
| "onnx::MatMul_2014", | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma", | |
| "/transformer_blocks.2/norm2_context/Constant_output_0", | |
| "/transformer_blocks.2/norm2_context/Constant_1_output_0", | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4", | |
| "onnx::MatMul_2017", | |
| "onnx::MatMul_2018", | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma", | |
| "/transformer_blocks.3/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.3/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1", | |
| "/transformer_blocks.3/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1", | |
| "onnx::MatMul_2022_onnx::MatMul_2019", | |
| "onnx::MatMul_2023_onnx::MatMul_2020", | |
| "onnx::MatMul_2024_onnx::MatMul_2021", | |
| "onnx::MatMul_2038", | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma", | |
| "/transformer_blocks.3/norm2/Constant_output_0", | |
| "/transformer_blocks.3/norm2/Constant_1_output_0", | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4", | |
| "onnx::MatMul_2040", | |
| "onnx::MatMul_2041", | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma", | |
| "onnx::MatMul_2091", | |
| "onnx::MatMul_2039", | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma", | |
| "/transformer_blocks.3/norm2_context/Constant_output_0", | |
| "/transformer_blocks.3/norm2_context/Constant_1_output_0", | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4", | |
| "onnx::MatMul_2042", | |
| "onnx::MatMul_2043", | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma", | |
| "/transformer_blocks.4/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.4/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1", | |
| "/transformer_blocks.4/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1", | |
| "onnx::MatMul_2047_onnx::MatMul_2044", | |
| "onnx::MatMul_2048_onnx::MatMul_2045", | |
| "onnx::MatMul_2049_onnx::MatMul_2046", | |
| "onnx::MatMul_2063", | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma", | |
| "/transformer_blocks.4/norm2/Constant_output_0", | |
| "/transformer_blocks.4/norm2/Constant_1_output_0", | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4", | |
| "onnx::MatMul_2065", | |
| "onnx::MatMul_2066", | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma", | |
| "onnx::MatMul_2092", | |
| "onnx::MatMul_2064", | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma", | |
| "/transformer_blocks.4/norm2_context/Constant_output_0", | |
| "/transformer_blocks.4/norm2_context/Constant_1_output_0", | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4", | |
| "onnx::MatMul_2067", | |
| "onnx::MatMul_2068", | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma", | |
| "/transformer_blocks.5/norm1/norm/Constant_output_0", | |
| "/transformer_blocks.5/norm1/norm/Constant_1_output_0", | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1", | |
| "/transformer_blocks.5/norm1_context/norm/Constant_output_0", | |
| "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0", | |
| "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1", | |
| "onnx::MatMul_2072_onnx::MatMul_2069", | |
| "onnx::MatMul_2073_onnx::MatMul_2070", | |
| "onnx::MatMul_2074_onnx::MatMul_2071", | |
| "onnx::MatMul_2085", | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma", | |
| "/transformer_blocks.5/norm2/Constant_output_0", | |
| "/transformer_blocks.5/norm2/Constant_1_output_0", | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4", | |
| "onnx::MatMul_2086", | |
| "onnx::MatMul_2087", | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma", | |
| "onnx::MatMul_2093" | |
| ] | |
| }, | |
| "super_instr": { | |
| "buffer_size": 0, | |
| "xrt_arg_id": 4, | |
| "packed_tensors": [] | |
| } | |
| }, | |
| "tensor_map": { | |
| "hidden_states_nhwc.out5_0_0": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1, | |
| 16 | |
| ], | |
| "size_in_bytes": 32, | |
| "op_tensor_size": 32, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "w", | |
| "h", | |
| "False" | |
| ], | |
| "offset": 0 | |
| }, | |
| "/pos_embed/Reshape_1_output_0.out_35_1_2": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "False", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 32 | |
| }, | |
| "controlnet_cond_nhwc.out5_0_1": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1, | |
| 16 | |
| ], | |
| "size_in_bytes": 32, | |
| "op_tensor_size": 32, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "w", | |
| "h", | |
| "False" | |
| ], | |
| "offset": 3104 | |
| }, | |
| "/time_text_embed/Cast_output_0.out17_3_3": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 256 | |
| ], | |
| "size_in_bytes": 512, | |
| "op_tensor_size": 512, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 3136 | |
| }, | |
| "pooled_projections.out17_3_1": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "size_in_bytes": 4096, | |
| "op_tensor_size": 4096, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 3648 | |
| }, | |
| "encoder_hidden_states.out17_3_0": { | |
| "packed_buffer_label": "in", | |
| "xrt_arg_id": 0, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 4096 | |
| ], | |
| "size_in_bytes": 8192, | |
| "op_tensor_size": 8192, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 7744 | |
| }, | |
| "/controlnet_blocks.0/Add_output_0.out17_3_13": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 0 | |
| }, | |
| "/controlnet_blocks.1/Add_output_0.out17_3_22": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 3072 | |
| }, | |
| "/controlnet_blocks.2/Add_output_0.out17_3_31": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 6144 | |
| }, | |
| "/controlnet_blocks.3/Add_output_0.out17_3_40": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 9216 | |
| }, | |
| "/controlnet_blocks.4/Add_output_0.out17_3_49": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 12288 | |
| }, | |
| "/controlnet_blocks.5/Add_output_0.out17_3_57": { | |
| "packed_buffer_label": "out", | |
| "xrt_arg_id": 1, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 15360 | |
| }, | |
| "/pos_embed/Transpose_output_0.out5_0_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 0 | |
| }, | |
| "/pos_embed/Add_2_output_0.out_35_1_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 3072 | |
| }, | |
| "/pos_embed_input/Transpose_output_0.out5_0_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 6144 | |
| }, | |
| "/Add_output_0.out_35_1_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 9216 | |
| }, | |
| "/time_text_embed/timestep_embedder/linear_1/Gemm_output_0.out17_3_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 12288 | |
| }, | |
| "/time_text_embed/timestep_embedder/act/Sigmoid_output_0.out2_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 15360 | |
| }, | |
| "/time_text_embed/timestep_embedder/linear_2/Gemm_output_0.out17_3_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 18432 | |
| }, | |
| "/time_text_embed/text_embedder/linear_1/Gemm_output_0.out17_3_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 21504 | |
| }, | |
| "/time_text_embed/text_embedder/act_1/Sigmoid_output_0.out2_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 24576 | |
| }, | |
| "/time_text_embed/text_embedder/linear_2/Gemm_output_0.out17_3_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 27648 | |
| }, | |
| "/time_text_embed/Add_output_0.out_35_1_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 30720 | |
| }, | |
| "/transformer_blocks.0/norm1/silu/Sigmoid_output_0.out2_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False" | |
| ], | |
| "offset": 33792 | |
| }, | |
| "encoder_hidden_states.out17_3_0_bfp.out25_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 4096 | |
| ], | |
| "size_in_bytes": 4096, | |
| "op_tensor_size": 4096, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 36864 | |
| }, | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 40960 | |
| }, | |
| "/context_embedder/Add_output_0.out17_3_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 42496 | |
| }, | |
| "/transformer_blocks.0/norm1_context/norm/LayerNormalization_output_0.out14_0_bfp.out15_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 45568 | |
| }, | |
| "/transformer_blocks.0/norm1_context/Add_2_output_0.out0_0_0_bfp.out1_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 47104 | |
| }, | |
| "/transformer_blocks.0/norm1/norm/LayerNormalization_output_0.out14_1_bfp.out15_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 48640 | |
| }, | |
| "/transformer_blocks.0/norm1/Add_2_output_0.out0_0_1_bfp.out1_22": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 50176 | |
| }, | |
| "/transformer_blocks.0/attn/Concat_output_0.out22_0_bfp.out23_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 51712 | |
| }, | |
| "/transformer_blocks.0/attn/Concat_1_output_0.out22_0_bfp.out23_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 53248 | |
| }, | |
| "/transformer_blocks.0/attn/Concat_2_output_0.out22_0_bfp.out23_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 54784 | |
| }, | |
| "/transformer_blocks.0/attn/Reshape_3_output_0.out22_0_bfp.out27_0_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 56320 | |
| }, | |
| "/transformer_blocks.0/attn/to_add_out/Add_output_0.out6_1_1_bfp.out7_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 57856 | |
| }, | |
| "/transformer_blocks.0/Add_4_output_0.out10_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 59392 | |
| }, | |
| "/transformer_blocks.0/norm2_context/LayerNormalization_output_0.out14_3_bfp.out15_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 62464 | |
| }, | |
| "/transformer_blocks.0/Add_6_output_0.out0_0_3_bfp.out1_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 64000 | |
| }, | |
| "/transformer_blocks.0/attn/to_out.0/Add_output_0.out6_1_0_bfp.out7_0": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 65536 | |
| }, | |
| "/transformer_blocks.0/ff_context/net.0/Mul_5_output_0.out17_2_1_bfp.out25_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 67072 | |
| }, | |
| "/transformer_blocks.0/ff_context/net.2/Add_output_0.out17_3_12_bfp.out25_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 73216 | |
| }, | |
| "/transformer_blocks.0/Add_7_output_0.out10_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 74752 | |
| }, | |
| "/transformer_blocks.0/Add_output_0.out10_20": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 77824 | |
| }, | |
| "/transformer_blocks.0/norm2/LayerNormalization_output_0.out14_2_bfp.out15_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 80896 | |
| }, | |
| "/transformer_blocks.0/Add_2_output_0.out0_0_2_bfp.out1_21": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 82432 | |
| }, | |
| "/transformer_blocks.0/ff/net.0/Mul_5_output_0.out17_2_0_bfp.out25_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 83968 | |
| }, | |
| "/transformer_blocks.0/ff/net.2/Add_output_0.out17_3_11_bfp.out25_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 90112 | |
| }, | |
| "/transformer_blocks.0/Add_3_output_0.out10_21": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 91648 | |
| }, | |
| "/transformer_blocks.1/norm1/norm/LayerNormalization_output_0.out14_4_bfp.out15_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 94720 | |
| }, | |
| "/transformer_blocks.1/norm1/Add_2_output_0.out0_0_4_bfp.out1_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 96256 | |
| }, | |
| "/transformer_blocks.1/norm1_context/norm/LayerNormalization_output_0.out14_5_bfp.out15_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 97792 | |
| }, | |
| "/transformer_blocks.1/norm1_context/Add_2_output_0.out0_0_5_bfp.out1_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 99328 | |
| }, | |
| "/transformer_blocks.1/attn/Concat_output_0.out22_1_bfp.out23_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 100864 | |
| }, | |
| "/transformer_blocks.1/attn/Concat_1_output_0.out22_1_bfp.out23_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 102400 | |
| }, | |
| "/transformer_blocks.1/attn/Concat_2_output_0.out22_1_bfp.out23_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 103936 | |
| }, | |
| "/transformer_blocks.1/attn/Reshape_3_output_0.out22_1_bfp.out27_0_1": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 105472 | |
| }, | |
| "/transformer_blocks.1/attn/to_out.0/Add_output_0.out6_1_2_bfp.out7_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 107008 | |
| }, | |
| "/transformer_blocks.1/Add_output_0.out10_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 108544 | |
| }, | |
| "/transformer_blocks.1/norm2/LayerNormalization_output_0.out14_6_bfp.out15_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 111616 | |
| }, | |
| "/transformer_blocks.1/Add_2_output_0.out0_0_6_bfp.out1_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 113152 | |
| }, | |
| "/transformer_blocks.1/ff/net.0/Mul_5_output_0.out17_2_2_bfp.out25_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 114688 | |
| }, | |
| "/transformer_blocks.1/ff/net.2/Add_output_0.out17_3_20_bfp.out25_12": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 120832 | |
| }, | |
| "/transformer_blocks.1/Add_3_output_0.out10_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 122368 | |
| }, | |
| "/transformer_blocks.1/attn/to_add_out/Add_output_0.out6_1_3_bfp.out7_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 125440 | |
| }, | |
| "/transformer_blocks.1/Add_4_output_0.out10_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 126976 | |
| }, | |
| "/transformer_blocks.1/norm2_context/LayerNormalization_output_0.out14_7_bfp.out15_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 130048 | |
| }, | |
| "/transformer_blocks.1/Add_6_output_0.out0_0_7_bfp.out1_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 131584 | |
| }, | |
| "/transformer_blocks.1/ff_context/net.0/Mul_5_output_0.out17_2_3_bfp.out25_11": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 133120 | |
| }, | |
| "/transformer_blocks.1/ff_context/net.2/Add_output_0.out17_3_21_bfp.out25_13": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 139264 | |
| }, | |
| "/transformer_blocks.1/Add_7_output_0.out10_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 140800 | |
| }, | |
| "/transformer_blocks.2/norm1/norm/LayerNormalization_output_0.out14_8_bfp.out15_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 143872 | |
| }, | |
| "/transformer_blocks.2/norm1/Add_2_output_0.out0_0_8_bfp.out1_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 145408 | |
| }, | |
| "/transformer_blocks.2/norm1_context/norm/LayerNormalization_output_0.out14_9_bfp.out15_9": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 146944 | |
| }, | |
| "/transformer_blocks.2/norm1_context/Add_2_output_0.out0_0_9_bfp.out1_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 148480 | |
| }, | |
| "/transformer_blocks.2/attn/Concat_output_0.out22_2_bfp.out23_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 150016 | |
| }, | |
| "/transformer_blocks.2/attn/Concat_1_output_0.out22_2_bfp.out23_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 151552 | |
| }, | |
| "/transformer_blocks.2/attn/Concat_2_output_0.out22_2_bfp.out23_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 153088 | |
| }, | |
| "/transformer_blocks.2/attn/Reshape_3_output_0.out22_2_bfp.out27_0_2": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 154624 | |
| }, | |
| "/transformer_blocks.2/attn/to_out.0/Add_output_0.out6_1_4_bfp.out7_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 156160 | |
| }, | |
| "/transformer_blocks.2/Add_output_0.out10_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 157696 | |
| }, | |
| "/transformer_blocks.2/norm2/LayerNormalization_output_0.out14_10_bfp.out15_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 160768 | |
| }, | |
| "/transformer_blocks.2/Add_2_output_0.out0_0_10_bfp.out1_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 162304 | |
| }, | |
| "/transformer_blocks.2/ff/net.0/Mul_5_output_0.out17_2_4_bfp.out25_15": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 163840 | |
| }, | |
| "/transformer_blocks.2/ff/net.2/Add_output_0.out17_3_29_bfp.out25_17": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 169984 | |
| }, | |
| "/transformer_blocks.2/Add_3_output_0.out10_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 171520 | |
| }, | |
| "/transformer_blocks.2/attn/to_add_out/Add_output_0.out6_1_5_bfp.out7_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 174592 | |
| }, | |
| "/transformer_blocks.2/Add_4_output_0.out10_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 176128 | |
| }, | |
| "/transformer_blocks.2/norm2_context/LayerNormalization_output_0.out14_11_bfp.out15_11": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 179200 | |
| }, | |
| "/transformer_blocks.2/Add_6_output_0.out0_0_11_bfp.out1_9": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 180736 | |
| }, | |
| "/transformer_blocks.2/ff_context/net.0/Mul_5_output_0.out17_2_5_bfp.out25_16": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 182272 | |
| }, | |
| "/transformer_blocks.2/ff_context/net.2/Add_output_0.out17_3_30_bfp.out25_18": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 188416 | |
| }, | |
| "/transformer_blocks.2/Add_7_output_0.out10_9": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 189952 | |
| }, | |
| "/transformer_blocks.3/norm1/norm/LayerNormalization_output_0.out14_12_bfp.out15_12": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 193024 | |
| }, | |
| "/transformer_blocks.3/norm1/Add_2_output_0.out0_0_12_bfp.out1_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 194560 | |
| }, | |
| "/transformer_blocks.3/norm1_context/norm/LayerNormalization_output_0.out14_13_bfp.out15_13": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 196096 | |
| }, | |
| "/transformer_blocks.3/norm1_context/Add_2_output_0.out0_0_13_bfp.out1_12": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 197632 | |
| }, | |
| "/transformer_blocks.3/attn/Concat_output_0.out22_3_bfp.out23_9": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 199168 | |
| }, | |
| "/transformer_blocks.3/attn/Concat_1_output_0.out22_3_bfp.out23_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 200704 | |
| }, | |
| "/transformer_blocks.3/attn/Concat_2_output_0.out22_3_bfp.out23_11": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 202240 | |
| }, | |
| "/transformer_blocks.3/attn/Reshape_3_output_0.out22_3_bfp.out27_0_3": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 203776 | |
| }, | |
| "/transformer_blocks.3/attn/to_out.0/Add_output_0.out6_1_6_bfp.out7_6": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 205312 | |
| }, | |
| "/transformer_blocks.3/Add_output_0.out10_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 206848 | |
| }, | |
| "/transformer_blocks.3/norm2/LayerNormalization_output_0.out14_14_bfp.out15_14": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 209920 | |
| }, | |
| "/transformer_blocks.3/Add_2_output_0.out0_0_14_bfp.out1_11": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 211456 | |
| }, | |
| "/transformer_blocks.3/ff/net.0/Mul_5_output_0.out17_2_6_bfp.out25_20": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 212992 | |
| }, | |
| "/transformer_blocks.3/ff/net.2/Add_output_0.out17_3_38_bfp.out25_22": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 219136 | |
| }, | |
| "/transformer_blocks.3/Add_3_output_0.out10_11": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 220672 | |
| }, | |
| "/transformer_blocks.3/attn/to_add_out/Add_output_0.out6_1_7_bfp.out7_7": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 223744 | |
| }, | |
| "/transformer_blocks.3/Add_4_output_0.out10_12": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 225280 | |
| }, | |
| "/transformer_blocks.3/norm2_context/LayerNormalization_output_0.out14_15_bfp.out15_15": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 228352 | |
| }, | |
| "/transformer_blocks.3/Add_6_output_0.out0_0_15_bfp.out1_13": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 229888 | |
| }, | |
| "/transformer_blocks.3/ff_context/net.0/Mul_5_output_0.out17_2_7_bfp.out25_21": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 231424 | |
| }, | |
| "/transformer_blocks.3/ff_context/net.2/Add_output_0.out17_3_39_bfp.out25_23": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 237568 | |
| }, | |
| "/transformer_blocks.3/Add_7_output_0.out10_13": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 239104 | |
| }, | |
| "/transformer_blocks.4/norm1/norm/LayerNormalization_output_0.out14_16_bfp.out15_16": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 242176 | |
| }, | |
| "/transformer_blocks.4/norm1/Add_2_output_0.out0_0_16_bfp.out1_14": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 243712 | |
| }, | |
| "/transformer_blocks.4/norm1_context/norm/LayerNormalization_output_0.out14_17_bfp.out15_17": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 245248 | |
| }, | |
| "/transformer_blocks.4/norm1_context/Add_2_output_0.out0_0_17_bfp.out1_16": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 246784 | |
| }, | |
| "/transformer_blocks.4/attn/Concat_output_0.out22_4_bfp.out23_12": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 248320 | |
| }, | |
| "/transformer_blocks.4/attn/Concat_1_output_0.out22_4_bfp.out23_13": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 249856 | |
| }, | |
| "/transformer_blocks.4/attn/Concat_2_output_0.out22_4_bfp.out23_14": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 251392 | |
| }, | |
| "/transformer_blocks.4/attn/Reshape_3_output_0.out22_4_bfp.out27_0_4": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 252928 | |
| }, | |
| "/transformer_blocks.4/attn/to_out.0/Add_output_0.out6_1_8_bfp.out7_8": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 254464 | |
| }, | |
| "/transformer_blocks.4/Add_output_0.out10_14": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 256000 | |
| }, | |
| "/transformer_blocks.4/norm2/LayerNormalization_output_0.out14_18_bfp.out15_18": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 259072 | |
| }, | |
| "/transformer_blocks.4/Add_2_output_0.out0_0_18_bfp.out1_15": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 260608 | |
| }, | |
| "/transformer_blocks.4/ff/net.0/Mul_5_output_0.out17_2_8_bfp.out25_25": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 262144 | |
| }, | |
| "/transformer_blocks.4/ff/net.2/Add_output_0.out17_3_47_bfp.out25_27": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 268288 | |
| }, | |
| "/transformer_blocks.4/Add_3_output_0.out10_15": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 269824 | |
| }, | |
| "/transformer_blocks.4/attn/to_add_out/Add_output_0.out6_1_9_bfp.out7_9": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 272896 | |
| }, | |
| "/transformer_blocks.4/Add_4_output_0.out10_16": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 274432 | |
| }, | |
| "/transformer_blocks.4/norm2_context/LayerNormalization_output_0.out14_19_bfp.out15_19": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 277504 | |
| }, | |
| "/transformer_blocks.4/Add_6_output_0.out0_0_19_bfp.out1_17": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 279040 | |
| }, | |
| "/transformer_blocks.4/ff_context/net.0/Mul_5_output_0.out17_2_9_bfp.out25_26": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 280576 | |
| }, | |
| "/transformer_blocks.4/ff_context/net.2/Add_output_0.out17_3_48_bfp.out25_28": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 286720 | |
| }, | |
| "/transformer_blocks.4/Add_7_output_0.out10_17": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 288256 | |
| }, | |
| "/transformer_blocks.5/norm1/norm/LayerNormalization_output_0.out14_20_bfp.out15_20": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 291328 | |
| }, | |
| "/transformer_blocks.5/norm1/Add_2_output_0.out0_0_20_bfp.out1_18": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 292864 | |
| }, | |
| "/transformer_blocks.5/norm1_context/norm/LayerNormalization_output_0.out14_21_bfp.out15_21": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 294400 | |
| }, | |
| "/transformer_blocks.5/norm1_context/Add_2_output_0.out0_0_21_bfp.out1_20": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length", | |
| "False" | |
| ], | |
| "offset": 295936 | |
| }, | |
| "/transformer_blocks.5/attn/Concat_output_0.out22_5_bfp.out23_15": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 297472 | |
| }, | |
| "/transformer_blocks.5/attn/Concat_1_output_0.out22_5_bfp.out23_16": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 299008 | |
| }, | |
| "/transformer_blocks.5/attn/Concat_2_output_0.out22_5_bfp.out23_17": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 24, | |
| 1, | |
| 64 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "False", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 300544 | |
| }, | |
| "/transformer_blocks.5/attn/Reshape_3_output_0.out22_5_bfp.out27_0_5": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "max_length + floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 302080 | |
| }, | |
| "/transformer_blocks.5/attn/to_out.0/Add_output_0.out6_1_10_bfp.out7_10": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 303616 | |
| }, | |
| "/transformer_blocks.5/Add_output_0.out10_18": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 305152 | |
| }, | |
| "/transformer_blocks.5/norm2/LayerNormalization_output_0.out14_22_bfp.out15_22": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 308224 | |
| }, | |
| "/transformer_blocks.5/Add_2_output_0.out0_0_22_bfp.out1_19": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 309760 | |
| }, | |
| "/transformer_blocks.5/ff/net.0/Mul_5_output_0.out17_2_10_bfp.out25_30": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 6144 | |
| ], | |
| "size_in_bytes": 6144, | |
| "op_tensor_size": 6144, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 311296 | |
| }, | |
| "/transformer_blocks.5/ff/net.2/Add_output_0.out17_3_56_bfp.out25_31": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 1536, | |
| "op_tensor_size": 1536, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 317440 | |
| }, | |
| "/transformer_blocks.5/Add_3_output_0.out10_19": { | |
| "packed_buffer_label": "scratch", | |
| "xrt_arg_id": 2, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1, | |
| 1, | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "dynamic_shapes": [ | |
| "batch_size", | |
| "floor(h/2)*floor(w/2)", | |
| "False" | |
| ], | |
| "offset": 318976 | |
| }, | |
| "pos_embed.proj.weight": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 135168 | |
| ], | |
| "size_in_bytes": 135168, | |
| "op_tensor_size": 135168, | |
| "offset": 0, | |
| "file_name": "cache/pos_embedprojConv_0.const", | |
| "file_size": 135168 | |
| }, | |
| "pos_embed_input.proj.weight": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 135168 | |
| ], | |
| "size_in_bytes": 135168, | |
| "op_tensor_size": 135168, | |
| "offset": 135168, | |
| "file_name": "cache/pos_embedprojConv_1.const", | |
| "file_size": 135168 | |
| }, | |
| "time_text_embed.timestep_embedder.linear_1.weight_5_1_2": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 448512 | |
| ], | |
| "size_in_bytes": 448512, | |
| "op_tensor_size": 448512, | |
| "offset": 270336, | |
| "file_name": "cache/pos_embedprojConv_2.const", | |
| "file_size": 448512 | |
| }, | |
| "/time_text_embed/timestep_embedder/act/Sigmoid.weights2_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 128 | |
| ], | |
| "size_in_bytes": 256, | |
| "op_tensor_size": 256, | |
| "offset": 718848, | |
| "file_name": "cache/pos_embedprojConv_3.const", | |
| "file_size": 256 | |
| }, | |
| "time_text_embed.timestep_embedder.linear_2.weight_5_1_3": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 719104, | |
| "file_name": "cache/pos_embedprojConv_4.const", | |
| "file_size": 2691072 | |
| }, | |
| "time_text_embed.text_embedder.linear_1.weight_5_1_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 3588096 | |
| ], | |
| "size_in_bytes": 3588096, | |
| "op_tensor_size": 3588096, | |
| "offset": 3410176, | |
| "file_name": "cache/pos_embedprojConv_5.const", | |
| "file_size": 3588096 | |
| }, | |
| "/time_text_embed/text_embedder/act_1/Sigmoid.weights2_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 128 | |
| ], | |
| "size_in_bytes": 256, | |
| "op_tensor_size": 256, | |
| "offset": 6998272, | |
| "file_name": "cache/pos_embedprojConv_6.const", | |
| "file_size": 256 | |
| }, | |
| "time_text_embed.text_embedder.linear_2.weight_5_1_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 6998528, | |
| "file_name": "cache/pos_embedprojConv_7.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.0/norm1/silu/Sigmoid.weights2_2": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 128 | |
| ], | |
| "size_in_bytes": 256, | |
| "op_tensor_size": 256, | |
| "offset": 9689600, | |
| "file_name": "cache/pos_embedprojConv_8.const", | |
| "file_size": 256 | |
| }, | |
| "encoder_hidden_states.out17_3_0_bfp.wts": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 64 | |
| ], | |
| "size_in_bytes": 128, | |
| "op_tensor_size": 128, | |
| "offset": 9689856, | |
| "file_name": "cache/pos_embedprojConv_9.const", | |
| "file_size": 128 | |
| }, | |
| "onnx::MatMul_1943": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 7274496 | |
| ], | |
| "size_in_bytes": 7274496, | |
| "op_tensor_size": 7274496, | |
| "offset": 9689984, | |
| "file_name": "cache/pos_embedprojConv_10.const", | |
| "file_size": 7274496 | |
| }, | |
| "/context_embedder/Add_output_0.out17_3_0_bfp.out25_0_bfp.wts": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 64 | |
| ], | |
| "size_in_bytes": 128, | |
| "op_tensor_size": 128, | |
| "offset": 16964480, | |
| "file_name": "cache/pos_embedprojConv_11.const", | |
| "file_size": 128 | |
| }, | |
| "/transformer_blocks.0/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 16964608, | |
| "file_name": "cache/pos_embedprojConv_12.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.0/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 16967680, | |
| "file_name": "cache/pos_embedprojConv_13.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_1_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 16970752, | |
| "file_name": "cache/pos_embedprojConv_14.const", | |
| "file_size": 5382144 | |
| }, | |
| "/transformer_blocks.0/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 22352896, | |
| "file_name": "cache/pos_embedprojConv_15.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.0/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 22355968, | |
| "file_name": "cache/pos_embedprojConv_16.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_4_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_3": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 22359040, | |
| "file_name": "cache/pos_embedprojConv_17.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1947_onnx::MatMul_1944": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 27741184, | |
| "file_name": "cache/pos_embedprojConv_18.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1948_onnx::MatMul_1945": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 33123328, | |
| "file_name": "cache/pos_embedprojConv_19.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1949_onnx::MatMul_1946": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 38505472, | |
| "file_name": "cache/pos_embedprojConv_20.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1964": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 43887616, | |
| "file_name": "cache/pos_embedprojConv_21.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_4_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 46578688, | |
| "file_name": "cache/pos_embedprojConv_22.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.0/norm2_context/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 49269760, | |
| "file_name": "cache/pos_embedprojConv_23.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.0/norm2_context/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 49272832, | |
| "file_name": "cache/pos_embedprojConv_24.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_3_transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_2": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 49275904, | |
| "file_name": "cache/pos_embedprojConv_25.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1963": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 54658048, | |
| "file_name": "cache/pos_embedprojConv_26.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_1967": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 57349120, | |
| "file_name": "cache/pos_embedprojConv_27.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_1968": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 68113408, | |
| "file_name": "cache/pos_embedprojConv_28.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.0.norm1_context.linear.weight_5_1_5_27_1_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 78877696, | |
| "file_name": "cache/pos_embedprojConv_29.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 81568768, | |
| "file_name": "cache/pos_embedprojConv_30.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.0/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 84259840, | |
| "file_name": "cache/pos_embedprojConv_31.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.0/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 84262912, | |
| "file_name": "cache/pos_embedprojConv_32.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_1_transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 84265984, | |
| "file_name": "cache/pos_embedprojConv_33.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1965": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 89648128, | |
| "file_name": "cache/pos_embedprojConv_34.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_1966": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 100412416, | |
| "file_name": "cache/pos_embedprojConv_35.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.0.norm1.linear.weight_5_1_4_27_0_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 111176704, | |
| "file_name": "cache/pos_embedprojConv_36.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.1/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 113867776, | |
| "file_name": "cache/pos_embedprojConv_37.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.1/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 113870848, | |
| "file_name": "cache/pos_embedprojConv_38.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_0_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 113873920, | |
| "file_name": "cache/pos_embedprojConv_39.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2088": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 119256064, | |
| "file_name": "cache/pos_embedprojConv_40.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.1/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 121947136, | |
| "file_name": "cache/pos_embedprojConv_41.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.1/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 121950208, | |
| "file_name": "cache/pos_embedprojConv_42.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_0_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 121953280, | |
| "file_name": "cache/pos_embedprojConv_43.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1972_onnx::MatMul_1969": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 127335424, | |
| "file_name": "cache/pos_embedprojConv_44.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1973_onnx::MatMul_1970": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 132717568, | |
| "file_name": "cache/pos_embedprojConv_45.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1974_onnx::MatMul_1971": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 138099712, | |
| "file_name": "cache/pos_embedprojConv_46.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1988": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 143481856, | |
| "file_name": "cache/pos_embedprojConv_47.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 146172928, | |
| "file_name": "cache/pos_embedprojConv_48.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.1/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 148864000, | |
| "file_name": "cache/pos_embedprojConv_49.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.1/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 148867072, | |
| "file_name": "cache/pos_embedprojConv_50.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_3_transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 148870144, | |
| "file_name": "cache/pos_embedprojConv_51.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1990": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 154252288, | |
| "file_name": "cache/pos_embedprojConv_52.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_1991": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 165016576, | |
| "file_name": "cache/pos_embedprojConv_53.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.1.norm1.linear.weight_5_1_6_27_2_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 175780864, | |
| "file_name": "cache/pos_embedprojConv_54.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2089": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 178471936, | |
| "file_name": "cache/pos_embedprojConv_55.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_1989": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 181163008, | |
| "file_name": "cache/pos_embedprojConv_56.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 183854080, | |
| "file_name": "cache/pos_embedprojConv_57.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.1/norm2_context/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 186545152, | |
| "file_name": "cache/pos_embedprojConv_58.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.1/norm2_context/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 186548224, | |
| "file_name": "cache/pos_embedprojConv_59.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_3_transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 186551296, | |
| "file_name": "cache/pos_embedprojConv_60.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1992": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 191933440, | |
| "file_name": "cache/pos_embedprojConv_61.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_1993": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 202697728, | |
| "file_name": "cache/pos_embedprojConv_62.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.1.norm1_context.linear.weight_5_1_7_27_3_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 213462016, | |
| "file_name": "cache/pos_embedprojConv_63.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.2/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 216153088, | |
| "file_name": "cache/pos_embedprojConv_64.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.2/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 216156160, | |
| "file_name": "cache/pos_embedprojConv_65.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_0_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 216159232, | |
| "file_name": "cache/pos_embedprojConv_66.const", | |
| "file_size": 5382144 | |
| }, | |
| "/transformer_blocks.2/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 221541376, | |
| "file_name": "cache/pos_embedprojConv_67.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.2/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 221544448, | |
| "file_name": "cache/pos_embedprojConv_68.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_0_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 221547520, | |
| "file_name": "cache/pos_embedprojConv_69.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1997_onnx::MatMul_1994": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 226929664, | |
| "file_name": "cache/pos_embedprojConv_70.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1998_onnx::MatMul_1995": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 232311808, | |
| "file_name": "cache/pos_embedprojConv_71.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_1999_onnx::MatMul_1996": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 237693952, | |
| "file_name": "cache/pos_embedprojConv_72.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2013": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 243076096, | |
| "file_name": "cache/pos_embedprojConv_73.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 245767168, | |
| "file_name": "cache/pos_embedprojConv_74.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.2/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 248458240, | |
| "file_name": "cache/pos_embedprojConv_75.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.2/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 248461312, | |
| "file_name": "cache/pos_embedprojConv_76.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_3_transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 248464384, | |
| "file_name": "cache/pos_embedprojConv_77.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2015": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 253846528, | |
| "file_name": "cache/pos_embedprojConv_78.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2016": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 264610816, | |
| "file_name": "cache/pos_embedprojConv_79.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.2.norm1.linear.weight_5_1_8_27_4_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 275375104, | |
| "file_name": "cache/pos_embedprojConv_80.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2090": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 278066176, | |
| "file_name": "cache/pos_embedprojConv_81.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2014": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 280757248, | |
| "file_name": "cache/pos_embedprojConv_82.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 283448320, | |
| "file_name": "cache/pos_embedprojConv_83.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.2/norm2_context/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 286139392, | |
| "file_name": "cache/pos_embedprojConv_84.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.2/norm2_context/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 286142464, | |
| "file_name": "cache/pos_embedprojConv_85.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_3_transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 286145536, | |
| "file_name": "cache/pos_embedprojConv_86.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2017": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 291527680, | |
| "file_name": "cache/pos_embedprojConv_87.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2018": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 302291968, | |
| "file_name": "cache/pos_embedprojConv_88.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.2.norm1_context.linear.weight_5_1_9_27_5_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 313056256, | |
| "file_name": "cache/pos_embedprojConv_89.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.3/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 315747328, | |
| "file_name": "cache/pos_embedprojConv_90.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.3/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 315750400, | |
| "file_name": "cache/pos_embedprojConv_91.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_0_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 315753472, | |
| "file_name": "cache/pos_embedprojConv_92.const", | |
| "file_size": 5382144 | |
| }, | |
| "/transformer_blocks.3/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 321135616, | |
| "file_name": "cache/pos_embedprojConv_93.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.3/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 321138688, | |
| "file_name": "cache/pos_embedprojConv_94.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_0_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 321141760, | |
| "file_name": "cache/pos_embedprojConv_95.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2022_onnx::MatMul_2019": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 326523904, | |
| "file_name": "cache/pos_embedprojConv_96.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2023_onnx::MatMul_2020": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 331906048, | |
| "file_name": "cache/pos_embedprojConv_97.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2024_onnx::MatMul_2021": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 337288192, | |
| "file_name": "cache/pos_embedprojConv_98.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2038": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 342670336, | |
| "file_name": "cache/pos_embedprojConv_99.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 345361408, | |
| "file_name": "cache/pos_embedprojConv_100.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.3/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 348052480, | |
| "file_name": "cache/pos_embedprojConv_101.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.3/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 348055552, | |
| "file_name": "cache/pos_embedprojConv_102.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_3_transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 348058624, | |
| "file_name": "cache/pos_embedprojConv_103.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2040": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 353440768, | |
| "file_name": "cache/pos_embedprojConv_104.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2041": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 364205056, | |
| "file_name": "cache/pos_embedprojConv_105.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.3.norm1.linear.weight_5_1_10_27_6_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 374969344, | |
| "file_name": "cache/pos_embedprojConv_106.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2091": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 377660416, | |
| "file_name": "cache/pos_embedprojConv_107.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2039": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 380351488, | |
| "file_name": "cache/pos_embedprojConv_108.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 383042560, | |
| "file_name": "cache/pos_embedprojConv_109.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.3/norm2_context/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 385733632, | |
| "file_name": "cache/pos_embedprojConv_110.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.3/norm2_context/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 385736704, | |
| "file_name": "cache/pos_embedprojConv_111.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_3_transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 385739776, | |
| "file_name": "cache/pos_embedprojConv_112.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2042": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 391121920, | |
| "file_name": "cache/pos_embedprojConv_113.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2043": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 401886208, | |
| "file_name": "cache/pos_embedprojConv_114.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.3.norm1_context.linear.weight_5_1_11_27_7_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 412650496, | |
| "file_name": "cache/pos_embedprojConv_115.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.4/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 415341568, | |
| "file_name": "cache/pos_embedprojConv_116.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.4/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 415344640, | |
| "file_name": "cache/pos_embedprojConv_117.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_0_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 415347712, | |
| "file_name": "cache/pos_embedprojConv_118.const", | |
| "file_size": 5382144 | |
| }, | |
| "/transformer_blocks.4/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 420729856, | |
| "file_name": "cache/pos_embedprojConv_119.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.4/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 420732928, | |
| "file_name": "cache/pos_embedprojConv_120.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_0_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 420736000, | |
| "file_name": "cache/pos_embedprojConv_121.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2047_onnx::MatMul_2044": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 426118144, | |
| "file_name": "cache/pos_embedprojConv_122.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2048_onnx::MatMul_2045": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 431500288, | |
| "file_name": "cache/pos_embedprojConv_123.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2049_onnx::MatMul_2046": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 436882432, | |
| "file_name": "cache/pos_embedprojConv_124.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2063": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 442264576, | |
| "file_name": "cache/pos_embedprojConv_125.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 444955648, | |
| "file_name": "cache/pos_embedprojConv_126.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.4/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 447646720, | |
| "file_name": "cache/pos_embedprojConv_127.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.4/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 447649792, | |
| "file_name": "cache/pos_embedprojConv_128.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_3_transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 447652864, | |
| "file_name": "cache/pos_embedprojConv_129.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2065": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 453035008, | |
| "file_name": "cache/pos_embedprojConv_130.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2066": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 463799296, | |
| "file_name": "cache/pos_embedprojConv_131.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.4.norm1.linear.weight_5_1_12_27_8_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 474563584, | |
| "file_name": "cache/pos_embedprojConv_132.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2092": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 477254656, | |
| "file_name": "cache/pos_embedprojConv_133.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2064": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 479945728, | |
| "file_name": "cache/pos_embedprojConv_134.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 482636800, | |
| "file_name": "cache/pos_embedprojConv_135.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.4/norm2_context/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 485327872, | |
| "file_name": "cache/pos_embedprojConv_136.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.4/norm2_context/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 485330944, | |
| "file_name": "cache/pos_embedprojConv_137.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_3_transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 485334016, | |
| "file_name": "cache/pos_embedprojConv_138.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2067": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 490716160, | |
| "file_name": "cache/pos_embedprojConv_139.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2068": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 501480448, | |
| "file_name": "cache/pos_embedprojConv_140.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.4.norm1_context.linear.weight_5_1_13_27_9_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 512244736, | |
| "file_name": "cache/pos_embedprojConv_141.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.5/norm1/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 514935808, | |
| "file_name": "cache/pos_embedprojConv_142.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.5/norm1/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 514938880, | |
| "file_name": "cache/pos_embedprojConv_143.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_0_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 514941952, | |
| "file_name": "cache/pos_embedprojConv_144.const", | |
| "file_size": 5382144 | |
| }, | |
| "/transformer_blocks.5/norm1_context/norm/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 520324096, | |
| "file_name": "cache/pos_embedprojConv_145.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.5/norm1_context/norm/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 520327168, | |
| "file_name": "cache/pos_embedprojConv_146.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_0_transformer_blocks.5.norm1_context.linear.weight_5_1_15_27_11_1": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 520330240, | |
| "file_name": "cache/pos_embedprojConv_147.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2072_onnx::MatMul_2069": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 525712384, | |
| "file_name": "cache/pos_embedprojConv_148.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2073_onnx::MatMul_2070": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 531094528, | |
| "file_name": "cache/pos_embedprojConv_149.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2074_onnx::MatMul_2071": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 536476672, | |
| "file_name": "cache/pos_embedprojConv_150.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2085": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 541858816, | |
| "file_name": "cache/pos_embedprojConv_151.const", | |
| "file_size": 2691072 | |
| }, | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_2_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 544549888, | |
| "file_name": "cache/pos_embedprojConv_152.const", | |
| "file_size": 2691072 | |
| }, | |
| "/transformer_blocks.5/norm2/Constant_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 547240960, | |
| "file_name": "cache/pos_embedprojConv_153.const", | |
| "file_size": 3072 | |
| }, | |
| "/transformer_blocks.5/norm2/Constant_1_output_0": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfloat16", | |
| "shape": [ | |
| 1536 | |
| ], | |
| "size_in_bytes": 3072, | |
| "op_tensor_size": 3072, | |
| "offset": 547244032, | |
| "file_name": "cache/pos_embedprojConv_154.const", | |
| "file_size": 3072 | |
| }, | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_3_transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_4": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 5382144 | |
| ], | |
| "size_in_bytes": 5382144, | |
| "op_tensor_size": 5382144, | |
| "offset": 547247104, | |
| "file_name": "cache/pos_embedprojConv_155.const", | |
| "file_size": 5382144 | |
| }, | |
| "onnx::MatMul_2086": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 552629248, | |
| "file_name": "cache/pos_embedprojConv_156.const", | |
| "file_size": 10764288 | |
| }, | |
| "onnx::MatMul_2087": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 10764288 | |
| ], | |
| "size_in_bytes": 10764288, | |
| "op_tensor_size": 10764288, | |
| "offset": 563393536, | |
| "file_name": "cache/pos_embedprojConv_157.const", | |
| "file_size": 10764288 | |
| }, | |
| "transformer_blocks.5.norm1.linear.weight_5_1_14_27_10_5_gma": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "uint8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 574157824, | |
| "file_name": "cache/pos_embedprojConv_158.const", | |
| "file_size": 2691072 | |
| }, | |
| "onnx::MatMul_2093": { | |
| "packed_buffer_label": "const", | |
| "xrt_arg_id": 3, | |
| "dtype": "bfp16ebs8", | |
| "shape": [ | |
| 2691072 | |
| ], | |
| "size_in_bytes": 2691072, | |
| "op_tensor_size": 2691072, | |
| "offset": 576848896, | |
| "file_name": "cache/pos_embedprojConv_159.const", | |
| "file_size": 2691072 | |
| } | |
| }, | |
| "dynamic_shape_subgraph": true, | |
| "dynamic_shape_list": [ | |
| { | |
| "floor(h/2)": 32, | |
| "max_length + floor(h/2)*floor(w/2)": 1184, | |
| "h": 64, | |
| "w": 64, | |
| "floor(h/2)*floor(w/2)": 1024, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 32 | |
| }, | |
| { | |
| "floor(h/2)": 32, | |
| "max_length + floor(h/2)*floor(w/2)": 1696, | |
| "h": 64, | |
| "w": 96, | |
| "floor(h/2)*floor(w/2)": 1536, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 48 | |
| }, | |
| { | |
| "floor(h/2)": 48, | |
| "max_length + floor(h/2)*floor(w/2)": 1696, | |
| "h": 96, | |
| "w": 64, | |
| "floor(h/2)*floor(w/2)": 1536, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 32 | |
| }, | |
| { | |
| "floor(h/2)": 36, | |
| "max_length + floor(h/2)*floor(w/2)": 2464, | |
| "h": 72, | |
| "w": 128, | |
| "floor(h/2)*floor(w/2)": 2304, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 64 | |
| }, | |
| { | |
| "floor(h/2)": 64, | |
| "max_length + floor(h/2)*floor(w/2)": 2464, | |
| "h": 128, | |
| "w": 72, | |
| "floor(h/2)*floor(w/2)": 2304, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 36 | |
| }, | |
| { | |
| "floor(h/2)": 48, | |
| "max_length + floor(h/2)*floor(w/2)": 3232, | |
| "h": 96, | |
| "w": 128, | |
| "floor(h/2)*floor(w/2)": 3072, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 64 | |
| }, | |
| { | |
| "floor(h/2)": 64, | |
| "max_length + floor(h/2)*floor(w/2)": 3232, | |
| "h": 128, | |
| "w": 96, | |
| "floor(h/2)*floor(w/2)": 3072, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 48 | |
| }, | |
| { | |
| "floor(h/2)": 64, | |
| "max_length + floor(h/2)*floor(w/2)": 4256, | |
| "h": 128, | |
| "w": 128, | |
| "floor(h/2)*floor(w/2)": 4096, | |
| "batch_size": 2, | |
| "max_length": 160, | |
| "floor(w/2)": 64 | |
| } | |
| ], | |
| "aux_info": {} | |
| } |