Upload TRT model for Nvidia H200

#7
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  model.onnx.data filter=lfs diff=lfs merge=lfs -text
38
  model_l40s_bf16.plan filter=lfs diff=lfs merge=lfs -text
 
 
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  model.onnx.data filter=lfs diff=lfs merge=lfs -text
38
  model_l40s_bf16.plan filter=lfs diff=lfs merge=lfs -text
39
+ model_h200_bf16.plan filter=lfs diff=lfs merge=lfs -text
model_h200_bf16.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e7265a49f23d6ed164a9d7aab7e4270e007d6077fcb374faf84d607fb68cccd
3
+ size 2005748132
trt_engine_layer_summary_h200_bf16.txt ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---- Resolved TRT Profile ----
2
+ MIN_BATCH=1
3
+ OPT_BATCH=3
4
+ MAX_BATCH=12
5
+ MIN_SEQ_LEN=1
6
+ OPT_SEQ_LEN=512
7
+ MAX_SEQ_LEN=512
8
+ WORKSPACE_SIZE=24696061952
9
+ BUILDER_OPTIMIZATION_LEVEL=3
10
+ PRECISION=bf16
11
+
12
+ ==== TensorRT Engine ====
13
+ Total Layers: 479
14
+
15
+ ==== Precision Statistics ====
16
+ BFloat16: 1475
17
+ Int64: 17
18
+ Bool: 4
19
+ Float: 1
20
+
21
+ ==== Layer Type Statistics ====
22
+ kgen: 316
23
+ gemm: 157
24
+ shape_call: 2
25
+ reshape: 2
26
+ cast: 2
27
+
28
+ ==== Layer List ====
29
+ # Type Precision Name
30
+ ----------------------------------------------------------------------------------------------------
31
+ 0 shape_call - __mye182376_0_myl0_0
32
+ 1 kgen BFloat16 __myl_IotaCastReshCastGtrEqlOr_myl0_1
33
+ 2 kgen BFloat16 __myl_MoveMul_myl0_2
34
+ 3 kgen BFloat16 __myl_CastEqlReshGathMulSeleMulMeanAddSqrtDivMulMul_myl0_3
35
+ 4 gemm BFloat16 node_linear_2+node_linear_1+node_linear_myl0_4
36
+ 5 kgen BFloat16 __myl_ReshReshSlicRepl_myl0_5
37
+ 6 kgen BFloat16 __myl_ReshMulMean_myl0_6
38
+ 7 kgen BFloat16 __myl_ReshTranMulMean_myl0_7
39
+ 8 kgen BFloat16 __myl_ConcSinReshCosReshAddSqrtDivMulAddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTranEtc_myl0_8
40
+ 9 gemm BFloat16 node_MatMul_689_myl0_9
41
+ 10 kgen Int64 __myl_CastReplReshIotaCastReshSlicReplReshReshSlicReplReshConcCastGathReshReshSubNegLtEqlGtrOrLtEtc_myl0_10
42
+ 11 kgen BFloat16 __myl_MoveReplConc_myl0_11
43
+ 12 kgen BFloat16 __myl_MoveReplConc_myl0_12
44
+ 13 gemm BFloat16 node_scaled_dot_product_attention_myl0_13
45
+ 14 gemm BFloat16 node_linear_3_myl0_14
46
+ 15 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_15
47
+ 16 kgen BFloat16 __myl_DivMulMulReshTranReshAddReshMulMeanAddSqrtDivMulMul_myl0_16
48
+ 17 gemm BFloat16 node_linear_5+node_linear_4_myl0_17
49
+ 18 kgen BFloat16 __myl_MulMulMulAddMulTanhAddMulMulMul_myl0_18
50
+ 19 gemm BFloat16 node_linear_6_myl0_19
51
+ 20 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_20
52
+ 21 gemm BFloat16 node_linear_9+node_linear_8+node_linear_7_myl0_21
53
+ 22 kgen BFloat16 __myl_ReshReshSlicRepl_myl0_22
54
+ 23 kgen BFloat16 __myl_ReshTranMulMean_myl0_23
55
+ 24 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_24
56
+ 25 kgen BFloat16 __myl_ReshMulMean_myl0_25
57
+ 26 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_26
58
+ 27 gemm BFloat16 node_MatMul_874_myl0_27
59
+ 28 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_28
60
+ 29 kgen BFloat16 __myl_MoveReplConc_myl0_29
61
+ 30 kgen BFloat16 __myl_MoveReplConc_myl0_30
62
+ 31 gemm BFloat16 node_scaled_dot_product_attention_1_myl0_31
63
+ 32 gemm BFloat16 node_linear_10_myl0_32
64
+ 33 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_33
65
+ 34 gemm BFloat16 node_linear_12+node_linear_11_myl0_34
66
+ 35 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_35
67
+ 36 gemm BFloat16 node_linear_13_myl0_36
68
+ 37 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_37
69
+ 38 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_38
70
+ 39 gemm BFloat16 node_linear_16+node_linear_15+node_linear_14_myl0_39
71
+ 40 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_40
72
+ 41 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_41
73
+ 42 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_42
74
+ 43 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_43
75
+ 44 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_44
76
+ 45 gemm BFloat16 node_MatMul_1059_myl0_45
77
+ 46 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_46
78
+ 47 kgen BFloat16 __myl_MoveReplConc_myl0_47
79
+ 48 kgen BFloat16 __myl_MoveReplConc_myl0_48
80
+ 49 gemm BFloat16 node_scaled_dot_product_attention_2_myl0_49
81
+ 50 gemm BFloat16 node_linear_17_myl0_50
82
+ 51 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_51
83
+ 52 gemm BFloat16 node_linear_19+node_linear_18_myl0_52
84
+ 53 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_53
85
+ 54 gemm BFloat16 node_linear_20_myl0_54
86
+ 55 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_55
87
+ 56 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_56
88
+ 57 gemm BFloat16 node_linear_23+node_linear_22+node_linear_21_myl0_57
89
+ 58 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_58
90
+ 59 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_59
91
+ 60 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_60
92
+ 61 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_61
93
+ 62 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_62
94
+ 63 gemm BFloat16 node_MatMul_1244_myl0_63
95
+ 64 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_64
96
+ 65 kgen BFloat16 __myl_MoveReplConc_myl0_65
97
+ 66 kgen BFloat16 __myl_MoveReplConc_myl0_66
98
+ 67 gemm BFloat16 node_scaled_dot_product_attention_3_myl0_67
99
+ 68 gemm BFloat16 node_linear_24_myl0_68
100
+ 69 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_69
101
+ 70 gemm BFloat16 node_linear_26+node_linear_25_myl0_70
102
+ 71 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_71
103
+ 72 gemm BFloat16 node_linear_27_myl0_72
104
+ 73 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_73
105
+ 74 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_74
106
+ 75 gemm BFloat16 node_linear_30+node_linear_29+node_linear_28_myl0_75
107
+ 76 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_76
108
+ 77 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_77
109
+ 78 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_78
110
+ 79 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_79
111
+ 80 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_80
112
+ 81 gemm BFloat16 node_MatMul_1429_myl0_81
113
+ 82 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_82
114
+ 83 kgen BFloat16 __myl_MoveReplConc_myl0_83
115
+ 84 kgen BFloat16 __myl_MoveReplConc_myl0_84
116
+ 85 gemm BFloat16 node_scaled_dot_product_attention_4_myl0_85
117
+ 86 gemm BFloat16 node_linear_31_myl0_86
118
+ 87 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_87
119
+ 88 gemm BFloat16 node_linear_33+node_linear_32_myl0_88
120
+ 89 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_89
121
+ 90 gemm BFloat16 node_linear_34_myl0_90
122
+ 91 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_91
123
+ 92 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_92
124
+ 93 gemm BFloat16 node_linear_37+node_linear_36+node_linear_35_myl0_93
125
+ 94 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_94
126
+ 95 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_95
127
+ 96 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_96
128
+ 97 reshape Int64 __mye690105_myl0_97
129
+ 98 cast Int64 cast_hvar^690101i64_myl0_98
130
+ 99 reshape Int64 __mye690131_myl0_99
131
+ 100 cast Int64 cast_hvar^690127i64_myl0_100
132
+ 101 kgen BFloat16 __myl_ConcSinReshCosReshAddSqrtDivMulAddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTranEtc_myl0_101
133
+ 102 gemm BFloat16 node_MatMul_1614_myl0_102
134
+ 103 kgen BFloat16 __myl_ReshSlicReplReshReshSlicReplReshConcCastGathReshEqlGtrOrAndAndSeleSlicReshAddMaxrSubExpSumEtc_myl0_103
135
+ 104 kgen BFloat16 __myl_MoveReplConc_myl0_104
136
+ 105 kgen BFloat16 __myl_MoveReplConc_myl0_105
137
+ 106 gemm BFloat16 node_scaled_dot_product_attention_5_myl0_106
138
+ 107 gemm BFloat16 node_linear_38_myl0_107
139
+ 108 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_108
140
+ 109 gemm BFloat16 node_linear_40+node_linear_39_myl0_109
141
+ 110 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_110
142
+ 111 gemm BFloat16 node_linear_41_myl0_111
143
+ 112 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_112
144
+ 113 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_113
145
+ 114 gemm BFloat16 node_linear_44+node_linear_43+node_linear_42_myl0_114
146
+ 115 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_115
147
+ 116 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_116
148
+ 117 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_117
149
+ 118 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_118
150
+ 119 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_119
151
+ 120 gemm BFloat16 node_MatMul_1799_myl0_120
152
+ 121 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_121
153
+ 122 kgen BFloat16 __myl_MoveReplConc_myl0_122
154
+ 123 kgen BFloat16 __myl_MoveReplConc_myl0_123
155
+ 124 gemm BFloat16 node_scaled_dot_product_attention_6_myl0_124
156
+ 125 gemm BFloat16 node_linear_45_myl0_125
157
+ 126 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_126
158
+ 127 gemm BFloat16 node_linear_47+node_linear_46_myl0_127
159
+ 128 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_128
160
+ 129 gemm BFloat16 node_linear_48_myl0_129
161
+ 130 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_130
162
+ 131 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_131
163
+ 132 gemm BFloat16 node_linear_51+node_linear_50+node_linear_49_myl0_132
164
+ 133 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_133
165
+ 134 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_134
166
+ 135 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_135
167
+ 136 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_136
168
+ 137 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_137
169
+ 138 gemm BFloat16 node_MatMul_1984_myl0_138
170
+ 139 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_139
171
+ 140 kgen BFloat16 __myl_MoveReplConc_myl0_140
172
+ 141 kgen BFloat16 __myl_MoveReplConc_myl0_141
173
+ 142 gemm BFloat16 node_scaled_dot_product_attention_7_myl0_142
174
+ 143 gemm BFloat16 node_linear_52_myl0_143
175
+ 144 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_144
176
+ 145 gemm BFloat16 node_linear_54+node_linear_53_myl0_145
177
+ 146 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_146
178
+ 147 gemm BFloat16 node_linear_55_myl0_147
179
+ 148 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_148
180
+ 149 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_149
181
+ 150 gemm BFloat16 node_linear_58+node_linear_57+node_linear_56_myl0_150
182
+ 151 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_151
183
+ 152 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_152
184
+ 153 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_153
185
+ 154 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_154
186
+ 155 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_155
187
+ 156 gemm BFloat16 node_MatMul_2169_myl0_156
188
+ 157 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_157
189
+ 158 kgen BFloat16 __myl_MoveReplConc_myl0_158
190
+ 159 kgen BFloat16 __myl_MoveReplConc_myl0_159
191
+ 160 gemm BFloat16 node_scaled_dot_product_attention_8_myl0_160
192
+ 161 gemm BFloat16 node_linear_59_myl0_161
193
+ 162 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_162
194
+ 163 gemm BFloat16 node_linear_61+node_linear_60_myl0_163
195
+ 164 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_164
196
+ 165 gemm BFloat16 node_linear_62_myl0_165
197
+ 166 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_166
198
+ 167 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_167
199
+ 168 gemm BFloat16 node_linear_65+node_linear_64+node_linear_63_myl0_168
200
+ 169 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_169
201
+ 170 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_170
202
+ 171 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_171
203
+ 172 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_172
204
+ 173 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_173
205
+ 174 gemm BFloat16 node_MatMul_2354_myl0_174
206
+ 175 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_175
207
+ 176 kgen BFloat16 __myl_MoveReplConc_myl0_176
208
+ 177 kgen BFloat16 __myl_MoveReplConc_myl0_177
209
+ 178 gemm BFloat16 node_scaled_dot_product_attention_9_myl0_178
210
+ 179 gemm BFloat16 node_linear_66_myl0_179
211
+ 180 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_180
212
+ 181 gemm BFloat16 node_linear_68+node_linear_67_myl0_181
213
+ 182 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_182
214
+ 183 gemm BFloat16 node_linear_69_myl0_183
215
+ 184 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_184
216
+ 185 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_185
217
+ 186 gemm BFloat16 node_linear_72+node_linear_71+node_linear_70_myl0_186
218
+ 187 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_187
219
+ 188 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_188
220
+ 189 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_189
221
+ 190 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_190
222
+ 191 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_191
223
+ 192 gemm BFloat16 node_MatMul_2539_myl0_192
224
+ 193 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_193
225
+ 194 kgen BFloat16 __myl_MoveReplConc_myl0_194
226
+ 195 kgen BFloat16 __myl_MoveReplConc_myl0_195
227
+ 196 gemm BFloat16 node_scaled_dot_product_attention_10_myl0_196
228
+ 197 gemm BFloat16 node_linear_73_myl0_197
229
+ 198 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_198
230
+ 199 gemm BFloat16 node_linear_75+node_linear_74_myl0_199
231
+ 200 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_200
232
+ 201 gemm BFloat16 node_linear_76_myl0_201
233
+ 202 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_202
234
+ 203 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_203
235
+ 204 gemm BFloat16 node_linear_79+node_linear_78+node_linear_77_myl0_204
236
+ 205 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_205
237
+ 206 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_206
238
+ 207 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_207
239
+ 208 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_208
240
+ 209 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_209
241
+ 210 gemm BFloat16 node_MatMul_2724_myl0_210
242
+ 211 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_211
243
+ 212 kgen BFloat16 __myl_MoveReplConc_myl0_212
244
+ 213 kgen BFloat16 __myl_MoveReplConc_myl0_213
245
+ 214 gemm BFloat16 node_scaled_dot_product_attention_11_myl0_214
246
+ 215 gemm BFloat16 node_linear_80_myl0_215
247
+ 216 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_216
248
+ 217 gemm BFloat16 node_linear_82+node_linear_81_myl0_217
249
+ 218 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_218
250
+ 219 gemm BFloat16 node_linear_83_myl0_219
251
+ 220 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_220
252
+ 221 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_221
253
+ 222 gemm BFloat16 node_linear_86+node_linear_85+node_linear_84_myl0_222
254
+ 223 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_223
255
+ 224 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_224
256
+ 225 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_225
257
+ 226 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_226
258
+ 227 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_227
259
+ 228 gemm BFloat16 node_MatMul_2909_myl0_228
260
+ 229 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_229
261
+ 230 kgen BFloat16 __myl_MoveReplConc_myl0_230
262
+ 231 kgen BFloat16 __myl_MoveReplConc_myl0_231
263
+ 232 gemm BFloat16 node_scaled_dot_product_attention_12_myl0_232
264
+ 233 gemm BFloat16 node_linear_87_myl0_233
265
+ 234 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_234
266
+ 235 gemm BFloat16 node_linear_89+node_linear_88_myl0_235
267
+ 236 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_236
268
+ 237 gemm BFloat16 node_linear_90_myl0_237
269
+ 238 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_238
270
+ 239 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_239
271
+ 240 gemm BFloat16 node_linear_93+node_linear_92+node_linear_91_myl0_240
272
+ 241 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_241
273
+ 242 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_242
274
+ 243 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_243
275
+ 244 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_244
276
+ 245 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_245
277
+ 246 gemm BFloat16 node_MatMul_3094_myl0_246
278
+ 247 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_247
279
+ 248 kgen BFloat16 __myl_MoveReplConc_myl0_248
280
+ 249 kgen BFloat16 __myl_MoveReplConc_myl0_249
281
+ 250 gemm BFloat16 node_scaled_dot_product_attention_13_myl0_250
282
+ 251 gemm BFloat16 node_linear_94_myl0_251
283
+ 252 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_252
284
+ 253 gemm BFloat16 node_linear_96+node_linear_95_myl0_253
285
+ 254 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_254
286
+ 255 gemm BFloat16 node_linear_97_myl0_255
287
+ 256 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_256
288
+ 257 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_257
289
+ 258 gemm BFloat16 node_linear_100+node_linear_99+node_linear_98_myl0_258
290
+ 259 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_259
291
+ 260 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_260
292
+ 261 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_261
293
+ 262 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_262
294
+ 263 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_263
295
+ 264 gemm BFloat16 node_MatMul_3279_myl0_264
296
+ 265 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_265
297
+ 266 kgen BFloat16 __myl_MoveReplConc_myl0_266
298
+ 267 kgen BFloat16 __myl_MoveReplConc_myl0_267
299
+ 268 gemm BFloat16 node_scaled_dot_product_attention_14_myl0_268
300
+ 269 gemm BFloat16 node_linear_101_myl0_269
301
+ 270 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_270
302
+ 271 gemm BFloat16 node_linear_103+node_linear_102_myl0_271
303
+ 272 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_272
304
+ 273 gemm BFloat16 node_linear_104_myl0_273
305
+ 274 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_274
306
+ 275 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_275
307
+ 276 gemm BFloat16 node_linear_107+node_linear_106+node_linear_105_myl0_276
308
+ 277 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_277
309
+ 278 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_278
310
+ 279 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_279
311
+ 280 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_280
312
+ 281 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_281
313
+ 282 gemm BFloat16 node_MatMul_3464_myl0_282
314
+ 283 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_283
315
+ 284 kgen BFloat16 __myl_MoveReplConc_myl0_284
316
+ 285 kgen BFloat16 __myl_MoveReplConc_myl0_285
317
+ 286 gemm BFloat16 node_scaled_dot_product_attention_15_myl0_286
318
+ 287 gemm BFloat16 node_linear_108_myl0_287
319
+ 288 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_288
320
+ 289 gemm BFloat16 node_linear_110+node_linear_109_myl0_289
321
+ 290 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_290
322
+ 291 gemm BFloat16 node_linear_111_myl0_291
323
+ 292 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_292
324
+ 293 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_293
325
+ 294 gemm BFloat16 node_linear_114+node_linear_113+node_linear_112_myl0_294
326
+ 295 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_295
327
+ 296 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_296
328
+ 297 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_297
329
+ 298 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_298
330
+ 299 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_299
331
+ 300 gemm BFloat16 node_MatMul_3649_myl0_300
332
+ 301 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_301
333
+ 302 kgen BFloat16 __myl_MoveReplConc_myl0_302
334
+ 303 kgen BFloat16 __myl_MoveReplConc_myl0_303
335
+ 304 gemm BFloat16 node_scaled_dot_product_attention_16_myl0_304
336
+ 305 gemm BFloat16 node_linear_115_myl0_305
337
+ 306 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_306
338
+ 307 gemm BFloat16 node_linear_117+node_linear_116_myl0_307
339
+ 308 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_308
340
+ 309 gemm BFloat16 node_linear_118_myl0_309
341
+ 310 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_310
342
+ 311 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_311
343
+ 312 gemm BFloat16 node_linear_121+node_linear_120+node_linear_119_myl0_312
344
+ 313 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_313
345
+ 314 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_314
346
+ 315 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_315
347
+ 316 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_316
348
+ 317 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_317
349
+ 318 gemm BFloat16 node_MatMul_3834_myl0_318
350
+ 319 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_319
351
+ 320 kgen BFloat16 __myl_MoveReplConc_myl0_320
352
+ 321 kgen BFloat16 __myl_MoveReplConc_myl0_321
353
+ 322 gemm BFloat16 node_scaled_dot_product_attention_17_myl0_322
354
+ 323 gemm BFloat16 node_linear_122_myl0_323
355
+ 324 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_324
356
+ 325 gemm BFloat16 node_linear_124+node_linear_123_myl0_325
357
+ 326 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_326
358
+ 327 gemm BFloat16 node_linear_125_myl0_327
359
+ 328 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_328
360
+ 329 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_329
361
+ 330 gemm BFloat16 node_linear_128+node_linear_127+node_linear_126_myl0_330
362
+ 331 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_331
363
+ 332 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_332
364
+ 333 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_333
365
+ 334 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_334
366
+ 335 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_335
367
+ 336 gemm BFloat16 node_MatMul_4019_myl0_336
368
+ 337 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_337
369
+ 338 kgen BFloat16 __myl_MoveReplConc_myl0_338
370
+ 339 kgen BFloat16 __myl_MoveReplConc_myl0_339
371
+ 340 gemm BFloat16 node_scaled_dot_product_attention_18_myl0_340
372
+ 341 gemm BFloat16 node_linear_129_myl0_341
373
+ 342 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_342
374
+ 343 gemm BFloat16 node_linear_131+node_linear_130_myl0_343
375
+ 344 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_344
376
+ 345 gemm BFloat16 node_linear_132_myl0_345
377
+ 346 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_346
378
+ 347 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_347
379
+ 348 gemm BFloat16 node_linear_135+node_linear_134+node_linear_133_myl0_348
380
+ 349 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_349
381
+ 350 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_350
382
+ 351 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_351
383
+ 352 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_352
384
+ 353 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_353
385
+ 354 gemm BFloat16 node_MatMul_4204_myl0_354
386
+ 355 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_355
387
+ 356 kgen BFloat16 __myl_MoveReplConc_myl0_356
388
+ 357 kgen BFloat16 __myl_MoveReplConc_myl0_357
389
+ 358 gemm BFloat16 node_scaled_dot_product_attention_19_myl0_358
390
+ 359 gemm BFloat16 node_linear_136_myl0_359
391
+ 360 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_360
392
+ 361 gemm BFloat16 node_linear_138+node_linear_137_myl0_361
393
+ 362 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_362
394
+ 363 gemm BFloat16 node_linear_139_myl0_363
395
+ 364 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_364
396
+ 365 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_365
397
+ 366 gemm BFloat16 node_linear_142+node_linear_141+node_linear_140_myl0_366
398
+ 367 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_367
399
+ 368 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_368
400
+ 369 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_369
401
+ 370 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_370
402
+ 371 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_371
403
+ 372 gemm BFloat16 node_MatMul_4389_myl0_372
404
+ 373 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_373
405
+ 374 kgen BFloat16 __myl_MoveReplConc_myl0_374
406
+ 375 kgen BFloat16 __myl_MoveReplConc_myl0_375
407
+ 376 gemm BFloat16 node_scaled_dot_product_attention_20_myl0_376
408
+ 377 gemm BFloat16 node_linear_143_myl0_377
409
+ 378 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_378
410
+ 379 gemm BFloat16 node_linear_145+node_linear_144_myl0_379
411
+ 380 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_380
412
+ 381 gemm BFloat16 node_linear_146_myl0_381
413
+ 382 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_382
414
+ 383 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_383
415
+ 384 gemm BFloat16 node_linear_149+node_linear_148+node_linear_147_myl0_384
416
+ 385 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_385
417
+ 386 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_386
418
+ 387 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_387
419
+ 388 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_388
420
+ 389 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_389
421
+ 390 gemm BFloat16 node_MatMul_4574_myl0_390
422
+ 391 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_391
423
+ 392 kgen BFloat16 __myl_MoveReplConc_myl0_392
424
+ 393 kgen BFloat16 __myl_MoveReplConc_myl0_393
425
+ 394 gemm BFloat16 node_scaled_dot_product_attention_21_myl0_394
426
+ 395 gemm BFloat16 node_linear_150_myl0_395
427
+ 396 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_396
428
+ 397 gemm BFloat16 node_linear_152+node_linear_151_myl0_397
429
+ 398 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_398
430
+ 399 gemm BFloat16 node_linear_153_myl0_399
431
+ 400 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_400
432
+ 401 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_401
433
+ 402 gemm BFloat16 node_linear_156+node_linear_155+node_linear_154_myl0_402
434
+ 403 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_403
435
+ 404 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_404
436
+ 405 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_405
437
+ 406 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_406
438
+ 407 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_407
439
+ 408 gemm BFloat16 node_MatMul_4759_myl0_408
440
+ 409 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_409
441
+ 410 kgen BFloat16 __myl_MoveReplConc_myl0_410
442
+ 411 kgen BFloat16 __myl_MoveReplConc_myl0_411
443
+ 412 gemm BFloat16 node_scaled_dot_product_attention_22_myl0_412
444
+ 413 gemm BFloat16 node_linear_157_myl0_413
445
+ 414 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_414
446
+ 415 gemm BFloat16 node_linear_159+node_linear_158_myl0_415
447
+ 416 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_416
448
+ 417 gemm BFloat16 node_linear_160_myl0_417
449
+ 418 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_418
450
+ 419 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_419
451
+ 420 gemm BFloat16 node_linear_163+node_linear_162+node_linear_161_myl0_420
452
+ 421 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_421
453
+ 422 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_422
454
+ 423 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_423
455
+ 424 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_424
456
+ 425 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_425
457
+ 426 gemm BFloat16 node_MatMul_4944_myl0_426
458
+ 427 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_427
459
+ 428 kgen BFloat16 __myl_MoveReplConc_myl0_428
460
+ 429 kgen BFloat16 __myl_MoveReplConc_myl0_429
461
+ 430 gemm BFloat16 node_scaled_dot_product_attention_23_myl0_430
462
+ 431 gemm BFloat16 node_linear_164_myl0_431
463
+ 432 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_432
464
+ 433 gemm BFloat16 node_linear_166+node_linear_165_myl0_433
465
+ 434 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_434
466
+ 435 gemm BFloat16 node_linear_167_myl0_435
467
+ 436 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_436
468
+ 437 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_437
469
+ 438 gemm BFloat16 node_linear_170+node_linear_169+node_linear_168_myl0_438
470
+ 439 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_439
471
+ 440 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_440
472
+ 441 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_441
473
+ 442 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_442
474
+ 443 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_443
475
+ 444 gemm BFloat16 node_MatMul_5129_myl0_444
476
+ 445 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_445
477
+ 446 kgen BFloat16 __myl_MoveReplConc_myl0_446
478
+ 447 kgen BFloat16 __myl_MoveReplConc_myl0_447
479
+ 448 gemm BFloat16 node_scaled_dot_product_attention_24_myl0_448
480
+ 449 gemm BFloat16 node_linear_171_myl0_449
481
+ 450 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_450
482
+ 451 gemm BFloat16 node_linear_173+node_linear_172_myl0_451
483
+ 452 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_452
484
+ 453 gemm BFloat16 node_linear_174_myl0_453
485
+ 454 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_454
486
+ 455 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMeanAddSqrtDivMulMul_myl0_455
487
+ 456 gemm BFloat16 node_linear_177+node_linear_176+node_linear_175_myl0_456
488
+ 457 kgen BFloat16 __myl_MoveReshTranReshReshSlicRepl_myl0_457
489
+ 458 kgen BFloat16 __myl_MoveReshTranReshTranMulMean_myl0_458
490
+ 459 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAdd_myl0_459
491
+ 460 kgen BFloat16 __myl_MoveReshTranReshReshMulMean_myl0_460
492
+ 461 kgen BFloat16 __myl_AddSqrtDivMulMulSlicNegSlicConcMulMulAddReshSlicReplReshTran_myl0_461
493
+ 462 gemm BFloat16 node_MatMul_5314_myl0_462
494
+ 463 kgen BFloat16 __myl_SlicReshAddMaxrSubExpSumDivMulIsnaSele_myl0_463
495
+ 464 kgen BFloat16 __myl_MoveReplConc_myl0_464
496
+ 465 kgen BFloat16 __myl_MoveReplConc_myl0_465
497
+ 466 gemm BFloat16 node_scaled_dot_product_attention_25_myl0_466
498
+ 467 gemm BFloat16 node_linear_178_myl0_467
499
+ 468 kgen BFloat16 __myl_MulMeanAddSqrtDivMulMulAddMulMeanAddSqrtDivMulMul_myl0_468
500
+ 469 gemm BFloat16 node_linear_180+node_linear_179_myl0_469
501
+ 470 kgen BFloat16 __myl_CastSum_myl0_470
502
+ 471 kgen BFloat16 __myl_MoveReshTranReshMulMulMulAddMulTanhAddMulMulMoveReshTranReshMul_myl0_471
503
+ 472 gemm BFloat16 node_linear_181_myl0_472
504
+ 473 kgen BFloat16 __myl_MulMeanAddSqrt_myl0_473
505
+ 474 kgen BFloat16 __myl_DivMulMulReshTranReshAddMulMean_myl0_474
506
+ 475 kgen BFloat16 __myl_ReshMaxMinAddSqrtDivMulMulMoveReshTranReshMoveReshMulSumReshDivMul_myl0_475
507
+ 476 gemm BFloat16 node_linear_182_myl0_476
508
+ 477 shape_call - __mye26_0_myl1_0
509
+ 478 kgen Float __myl_Cast_myl1_1