File size: 301,906 Bytes
ca600aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
{
    func main<ios17>(tensor<fp32, [1, 1, 768]> audio_emb, tensor<fp32, [1, 256]> encoder_mask, tensor<fp32, [1, 256, 768]> encoder_output, tensor<int32, [1]> position, tensor<fp32, [1, 600, 12, 64]> sa_k_in_0, tensor<fp32, [1, 600, 12, 64]> sa_k_in_1, tensor<fp32, [1, 600, 12, 64]> sa_k_in_10, tensor<fp32, [1, 600, 12, 64]> sa_k_in_11, tensor<fp32, [1, 600, 12, 64]> sa_k_in_2, tensor<fp32, [1, 600, 12, 64]> sa_k_in_3, tensor<fp32, [1, 600, 12, 64]> sa_k_in_4, tensor<fp32, [1, 600, 12, 64]> sa_k_in_5, tensor<fp32, [1, 600, 12, 64]> sa_k_in_6, tensor<fp32, [1, 600, 12, 64]> sa_k_in_7, tensor<fp32, [1, 600, 12, 64]> sa_k_in_8, tensor<fp32, [1, 600, 12, 64]> sa_k_in_9, tensor<fp32, [1, 600, 12, 64]> sa_v_in_0, tensor<fp32, [1, 600, 12, 64]> sa_v_in_1, tensor<fp32, [1, 600, 12, 64]> sa_v_in_10, tensor<fp32, [1, 600, 12, 64]> sa_v_in_11, tensor<fp32, [1, 600, 12, 64]> sa_v_in_2, tensor<fp32, [1, 600, 12, 64]> sa_v_in_3, tensor<fp32, [1, 600, 12, 64]> sa_v_in_4, tensor<fp32, [1, 600, 12, 64]> sa_v_in_5, tensor<fp32, [1, 600, 12, 64]> sa_v_in_6, tensor<fp32, [1, 600, 12, 64]> sa_v_in_7, tensor<fp32, [1, 600, 12, 64]> sa_v_in_8, tensor<fp32, [1, 600, 12, 64]> sa_v_in_9, tensor<fp32, [1, 256, 1, 128]> xa_k_0, tensor<fp32, [1, 256, 1, 128]> xa_k_1, tensor<fp32, [1, 256, 1, 128]> xa_k_10, tensor<fp32, [1, 256, 1, 128]> xa_k_11, tensor<fp32, [1, 256, 1, 128]> xa_k_2, tensor<fp32, [1, 256, 1, 128]> xa_k_3, tensor<fp32, [1, 256, 1, 128]> xa_k_4, tensor<fp32, [1, 256, 1, 128]> xa_k_5, tensor<fp32, [1, 256, 1, 128]> xa_k_6, tensor<fp32, [1, 256, 1, 128]> xa_k_7, tensor<fp32, [1, 256, 1, 128]> xa_k_8, tensor<fp32, [1, 256, 1, 128]> xa_k_9, tensor<fp32, [1, 256, 1, 128]> xa_v_0, tensor<fp32, [1, 256, 1, 128]> xa_v_1, tensor<fp32, [1, 256, 1, 128]> xa_v_10, tensor<fp32, [1, 256, 1, 128]> xa_v_11, tensor<fp32, [1, 256, 1, 128]> xa_v_2, tensor<fp32, [1, 256, 1, 128]> xa_v_3, tensor<fp32, [1, 256, 1, 128]> xa_v_4, tensor<fp32, [1, 256, 1, 128]> xa_v_5, tensor<fp32, [1, 256, 1, 128]> xa_v_6, tensor<fp32, [1, 256, 1, 128]> xa_v_7, tensor<fp32, [1, 256, 1, 128]> xa_v_8, tensor<fp32, [1, 256, 1, 128]> xa_v_9) {
            tensor<int32, []> var_502_batch_dims_0 = const()[name = tensor<string, []>("op_502_batch_dims_0"), val = tensor<int32, []>(0)];
            tensor<bool, []> var_502_validate_indices_0 = const()[name = tensor<string, []>("op_502_validate_indices_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [2048, 768]> dec_position_embeddings_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_position_embeddings_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1575104))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1572992)))];
            tensor<string, []> position_to_int16_dtype_0 = const()[name = tensor<string, []>("position_to_int16_dtype_0"), val = tensor<string, []>("int16")];
            tensor<string, []> cast_63_dtype_0 = const()[name = tensor<string, []>("cast_63_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> greater_equal_0_y_0 = const()[name = tensor<string, []>("greater_equal_0_y_0"), val = tensor<int32, []>(0)];
            tensor<int16, [1]> position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = tensor<string, []>("cast_83")];
            tensor<int32, [1]> cast_63 = cast(dtype = cast_63_dtype_0, x = position_to_int16)[name = tensor<string, []>("cast_82")];
            tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_63, y = greater_equal_0_y_0)[name = tensor<string, []>("greater_equal_0")];
            tensor<int32, []> slice_by_index_0 = const()[name = tensor<string, []>("slice_by_index_0"), val = tensor<int32, []>(2048)];
            tensor<int32, [1]> add_0 = add(x = cast_63, y = slice_by_index_0)[name = tensor<string, []>("add_0")];
            tensor<int32, [1]> select_0 = select(a = cast_63, b = add_0, cond = greater_equal_0)[name = tensor<string, []>("select_0")];
            tensor<string, []> select_0_to_int16_dtype_0 = const()[name = tensor<string, []>("select_0_to_int16_dtype_0"), val = tensor<string, []>("int16")];
            tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> greater_equal_0_y_0_1 = const()[name = tensor<string, []>("greater_equal_0_y_0_1"), val = tensor<int32, []>(0)];
            tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor<string, []>("cast_81")];
            tensor<int32, [1]> cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor<string, []>("cast_80")];
            tensor<bool, [1]> greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor<string, []>("greater_equal_0_1")];
            tensor<int32, []> slice_by_index_0_1 = const()[name = tensor<string, []>("slice_by_index_0_1"), val = tensor<int32, []>(2048)];
            tensor<int32, [1]> add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor<string, []>("add_0_1")];
            tensor<int32, [1]> select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor<string, []>("select_0_1")];
            tensor<int32, []> op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor<string, []>("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor<int32, []>(0)];
            tensor<fp16, [1, 768]> op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = tensor<string, []>("op_502_cast_fp16_cast_uint16_cast_uint16")];
            tensor<string, []> audio_emb_to_fp16_dtype_0 = const()[name = tensor<string, []>("audio_emb_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 1, 768]> audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = tensor<string, []>("cast_79")];
            tensor<fp16, [1, 1, 768]> input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = tensor<string, []>("input_3_cast_fp16")];
            tensor<fp16, [600]> idx_range_promoted_to_fp16 = const()[name = tensor<string, []>("idx_range_promoted_to_fp16"), val = tensor<fp16, [600]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1579264)))];
            tensor<string, []> var_515_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_515_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1]> position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = tensor<string, []>("cast_78")];
            tensor<bool, [600]> var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor<string, []>("op_516_cast_fp16")];
            tensor<int32, [1]> sa_key_mask_axes_0 = const()[name = tensor<string, []>("sa_key_mask_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<string, []> sa_key_mask_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_key_mask_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [600]> var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = tensor<string, []>("cast_77")];
            tensor<fp16, [1, 600]> sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = tensor<string, []>("sa_key_mask_cast_fp16")];
            tensor<int32, [1]> input_5_axes_0 = const()[name = tensor<string, []>("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1580544)))];
            tensor<fp16, []> var_525_to_fp16 = const()[name = tensor<string, []>("op_525_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1582144))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3354048))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3358720)))];
            tensor<fp16, [1, 1, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
            tensor<int32, [5]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = tensor<string, []>("qkv_3_cast_fp16")];
            tensor<int32, [5]> q_1_begin_0 = const()[name = tensor<string, []>("q_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_1_end_0 = const()[name = tensor<string, []>("q_1_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_1_end_mask_0 = const()[name = tensor<string, []>("q_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_1_squeeze_mask_0 = const()[name = tensor<string, []>("q_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
            tensor<int32, [5]> new_k_1_begin_0 = const()[name = tensor<string, []>("new_k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_1_end_0 = const()[name = tensor<string, []>("new_k_1_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_1_end_mask_0 = const()[name = tensor<string, []>("new_k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_k_1_cast_fp16")];
            tensor<int32, [5]> new_v_1_begin_0 = const()[name = tensor<string, []>("new_v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_1_end_0 = const()[name = tensor<string, []>("new_v_1_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_1_end_mask_0 = const()[name = tensor<string, []>("new_v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_v_1_cast_fp16")];
            tensor<bool, [600]> var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor<string, []>("op_585_cast_fp16")];
            tensor<int32, [4]> var_595 = const()[name = tensor<string, []>("op_595"), val = tensor<int32, [4]>([1, 600, 1, 1])];
            tensor<string, []> write_oh_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("write_oh_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [600]> var_585_cast_fp16_to_fp16 = cast(dtype = write_oh_1_to_fp16_dtype_0, x = var_585_cast_fp16)[name = tensor<string, []>("cast_76")];
            tensor<fp16, [1, 600, 1, 1]> write_oh_b_1_cast_fp16 = reshape(shape = var_595, x = var_585_cast_fp16_to_fp16)[name = tensor<string, []>("write_oh_b_1_cast_fp16")];
            tensor<fp16, []> var_597_to_fp16 = const()[name = tensor<string, []>("op_597_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
            tensor<fp16, [1, 600, 1, 1]> var_599_cast_fp16 = sub(x = var_597_to_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
            tensor<string, []> sa_k_in_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_0_to_fp16 = cast(dtype = sa_k_in_0_to_fp16_dtype_0, x = sa_k_in_0)[name = tensor<string, []>("cast_75")];
            tensor<fp16, [1, 600, 12, 64]> var_600_cast_fp16 = mul(x = sa_k_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_600_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_601_cast_fp16 = mul(x = new_k_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_601_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_1_cast_fp16 = add(x = var_600_cast_fp16, y = var_601_cast_fp16)[name = tensor<string, []>("sa_k_out_1_cast_fp16")];
            tensor<string, []> sa_k_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_0_to_fp16 = cast(dtype = sa_v_in_0_to_fp16_dtype_0, x = sa_v_in_0)[name = tensor<string, []>("cast_74")];
            tensor<fp16, [1, 600, 12, 64]> var_607_cast_fp16 = mul(x = sa_v_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_607_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_608_cast_fp16 = mul(x = new_v_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_608_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_1_cast_fp16 = add(x = var_607_cast_fp16, y = var_608_cast_fp16)[name = tensor<string, []>("sa_v_out_1_cast_fp16")];
            tensor<string, []> sa_v_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_627 = const()[name = tensor<string, []>("op_627"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_629_transpose_x_0 = const()[name = tensor<string, []>("op_629_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_629_transpose_y_0 = const()[name = tensor<string, []>("op_629_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_96_perm_0 = const()[name = tensor<string, []>("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_97_perm_0 = const()[name = tensor<string, []>("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_97 = transpose(perm = transpose_97_perm_0, x = sa_k_out_1_cast_fp16)[name = tensor<string, []>("transpose_262")];
            tensor<fp16, [1, 12, 1, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = tensor<string, []>("transpose_263")];
            tensor<fp16, [1, 12, 1, 600]> var_629_cast_fp16 = matmul(transpose_x = var_629_transpose_x_0, transpose_y = var_629_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor<string, []>("op_629_cast_fp16")];
            tensor<fp16, []> var_630_to_fp16 = const()[name = tensor<string, []>("op_630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_1_cast_fp16 = mul(x = var_629_cast_fp16, y = var_630_to_fp16)[name = tensor<string, []>("scores_1_cast_fp16")];
            tensor<int32, [1]> var_638_axes_0 = const()[name = tensor<string, []>("op_638_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 600]> var_638_cast_fp16 = expand_dims(axes = var_638_axes_0, x = sa_key_mask_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
            tensor<int32, [1]> var_640_axes_0 = const()[name = tensor<string, []>("op_640_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 1, 1, 600]> var_640_cast_fp16 = expand_dims(axes = var_640_axes_0, x = var_638_cast_fp16)[name = tensor<string, []>("op_640_cast_fp16")];
            tensor<fp16, []> var_646_promoted_to_fp16 = const()[name = tensor<string, []>("op_646_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
            tensor<bool, [1, 1, 1, 600]> var_647_cast_fp16 = equal(x = var_640_cast_fp16, y = var_646_promoted_to_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
            tensor<fp16, []> var_648_to_fp16 = const()[name = tensor<string, []>("op_648_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_3_cast_fp16 = select(a = var_648_to_fp16, b = scores_1_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_3_cast_fp16")];
            tensor<int32, []> var_650 = const()[name = tensor<string, []>("op_650"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_1_cast_fp16 = softmax(axis = var_650, x = scores_3_cast_fp16)[name = tensor<string, []>("probs_1_cast_fp16")];
            tensor<bool, []> var_653_transpose_x_0 = const()[name = tensor<string, []>("op_653_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_653_transpose_y_0 = const()[name = tensor<string, []>("op_653_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_1_cast_fp16 = transpose(perm = var_627, x = sa_v_out_1_cast_fp16)[name = tensor<string, []>("transpose_261")];
            tensor<fp16, [1, 12, 1, 64]> var_653_cast_fp16 = matmul(transpose_x = var_653_transpose_x_0, transpose_y = var_653_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
            tensor<int32, [4]> var_658 = const()[name = tensor<string, []>("op_658"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_663 = const()[name = tensor<string, []>("op_663"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_659_cast_fp16 = transpose(perm = var_658, x = var_653_cast_fp16)[name = tensor<string, []>("transpose_260")];
            tensor<fp16, [1, 1, 768]> input_7_cast_fp16 = reshape(shape = var_663, x = var_659_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3363392))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3954112))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3955712)))];
            tensor<fp16, [1, 1, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
            tensor<int32, [1]> input_11_axes_0 = const()[name = tensor<string, []>("input_11_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3957312)))];
            tensor<fp16, []> var_671_to_fp16 = const()[name = tensor<string, []>("op_671_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_671_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3958912))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057472))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057792)))];
            tensor<fp16, [1, 1, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
            tensor<int32, [4]> var_684 = const()[name = tensor<string, []>("op_684"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_1_cast_fp16 = reshape(shape = var_684, x = linear_2_cast_fp16)[name = tensor<string, []>("xq_proj_1_cast_fp16")];
            tensor<int32, [4]> var_702 = const()[name = tensor<string, []>("op_702"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_704_transpose_x_0 = const()[name = tensor<string, []>("op_704_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_704_transpose_y_0 = const()[name = tensor<string, []>("op_704_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_98_perm_0 = const()[name = tensor<string, []>("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_99_perm_0 = const()[name = tensor<string, []>("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = tensor<string, []>("cast_73")];
            tensor<fp16, [1, 1, 128, 256]> transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = tensor<string, []>("transpose_258")];
            tensor<fp16, [1, 1, 1, 128]> transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = tensor<string, []>("transpose_259")];
            tensor<fp16, [1, 1, 1, 256]> var_704_cast_fp16 = matmul(transpose_x = var_704_transpose_x_0, transpose_y = var_704_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor<string, []>("op_704_cast_fp16")];
            tensor<fp16, []> var_705_to_fp16 = const()[name = tensor<string, []>("op_705_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_1_cast_fp16 = mul(x = var_704_cast_fp16, y = var_705_to_fp16)[name = tensor<string, []>("xscores_1_cast_fp16")];
            tensor<int32, [1]> var_713_axes_0 = const()[name = tensor<string, []>("op_713_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<string, []> encoder_mask_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_mask_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 256]> encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = tensor<string, []>("cast_72")];
            tensor<fp16, [1, 1, 256]> var_713_cast_fp16 = expand_dims(axes = var_713_axes_0, x = encoder_mask_to_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
            tensor<int32, [1]> var_715_axes_0 = const()[name = tensor<string, []>("op_715_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 1, 1, 256]> var_715_cast_fp16 = expand_dims(axes = var_715_axes_0, x = var_713_cast_fp16)[name = tensor<string, []>("op_715_cast_fp16")];
            tensor<fp16, []> var_721_promoted_to_fp16 = const()[name = tensor<string, []>("op_721_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
            tensor<bool, [1, 1, 1, 256]> var_722_cast_fp16 = equal(x = var_715_cast_fp16, y = var_721_promoted_to_fp16)[name = tensor<string, []>("op_722_cast_fp16")];
            tensor<fp16, []> var_723_to_fp16 = const()[name = tensor<string, []>("op_723_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_3_cast_fp16 = select(a = var_723_to_fp16, b = xscores_1_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_3_cast_fp16")];
            tensor<int32, []> var_725 = const()[name = tensor<string, []>("op_725"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_1_cast_fp16 = softmax(axis = var_725, x = xscores_3_cast_fp16)[name = tensor<string, []>("xprobs_1_cast_fp16")];
            tensor<bool, []> var_728_transpose_x_0 = const()[name = tensor<string, []>("op_728_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_728_transpose_y_0 = const()[name = tensor<string, []>("op_728_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = tensor<string, []>("cast_71")];
            tensor<fp16, [1, 1, 256, 128]> xvT_1_cast_fp16 = transpose(perm = var_702, x = xa_v_0_to_fp16)[name = tensor<string, []>("transpose_257")];
            tensor<fp16, [1, 1, 1, 128]> var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
            tensor<int32, [4]> var_733 = const()[name = tensor<string, []>("op_733"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_734_cast_fp16 = transpose(perm = var_733, x = var_728_cast_fp16)[name = tensor<string, []>("transpose_256")];
            tensor<fp16, [1, 1, 128]> input_13_cast_fp16 = reshape(shape = var_738, x = var_734_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4058112))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4156480))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
            tensor<int32, [1]> x_1_axes_0 = const()[name = tensor<string, []>("x_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4158080)))];
            tensor<fp16, []> var_746_to_fp16 = const()[name = tensor<string, []>("op_746_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_746_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("x_1_cast_fp16")];
            tensor<int32, [3]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_1_pad_type_0 = const()[name = tensor<string, []>("y_1_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_1_strides_0 = const()[name = tensor<string, []>("y_1_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_1_pad_0 = const()[name = tensor<string, []>("y_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_1_dilations_0 = const()[name = tensor<string, []>("y_1_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_1_groups_0 = const()[name = tensor<string, []>("y_1_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4159680))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6522176))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_3_cast_fp16 = transpose(perm = var_762, x = x_1_cast_fp16)[name = tensor<string, []>("transpose_255")];
            tensor<fp16, [1, 3072, 1]> y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = tensor<string, []>("y_1_cast_fp16")];
            tensor<string, []> x_5_mode_0 = const()[name = tensor<string, []>("x_5_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
            tensor<string, []> y_3_pad_type_0 = const()[name = tensor<string, []>("y_3_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_3_strides_0 = const()[name = tensor<string, []>("y_3_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_3_pad_0 = const()[name = tensor<string, []>("y_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_3_dilations_0 = const()[name = tensor<string, []>("y_3_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_3_groups_0 = const()[name = tensor<string, []>("y_3_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6528384))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8887744))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = tensor<string, []>("y_3_cast_fp16")];
            tensor<int32, [3]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_781_cast_fp16 = transpose(perm = var_780, x = y_3_cast_fp16)[name = tensor<string, []>("transpose_254")];
            tensor<fp16, [1, 1, 768]> input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_781_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
            tensor<int32, [1]> input_19_axes_0 = const()[name = tensor<string, []>("input_19_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8889344)))];
            tensor<fp16, []> var_785_to_fp16 = const()[name = tensor<string, []>("op_785_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_785_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8890944))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10660480))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
            tensor<int32, [5]> var_799 = const()[name = tensor<string, []>("op_799"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_799, x = linear_4_cast_fp16)[name = tensor<string, []>("qkv_7_cast_fp16")];
            tensor<int32, [5]> q_3_begin_0 = const()[name = tensor<string, []>("q_3_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_3_end_0 = const()[name = tensor<string, []>("q_3_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_3_end_mask_0 = const()[name = tensor<string, []>("q_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_3_squeeze_mask_0 = const()[name = tensor<string, []>("q_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("q_3_cast_fp16")];
            tensor<int32, [5]> new_k_3_begin_0 = const()[name = tensor<string, []>("new_k_3_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_3_end_0 = const()[name = tensor<string, []>("new_k_3_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_3_end_mask_0 = const()[name = tensor<string, []>("new_k_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_k_3_cast_fp16")];
            tensor<int32, [5]> new_v_3_begin_0 = const()[name = tensor<string, []>("new_v_3_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_3_end_0 = const()[name = tensor<string, []>("new_v_3_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_3_end_mask_0 = const()[name = tensor<string, []>("new_v_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_v_3_cast_fp16")];
            tensor<string, []> sa_k_in_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_1_to_fp16 = cast(dtype = sa_k_in_1_to_fp16_dtype_0, x = sa_k_in_1)[name = tensor<string, []>("cast_70")];
            tensor<fp16, [1, 600, 12, 64]> var_860_cast_fp16 = mul(x = sa_k_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_860_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_861_cast_fp16 = mul(x = new_k_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_861_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_3_cast_fp16 = add(x = var_860_cast_fp16, y = var_861_cast_fp16)[name = tensor<string, []>("sa_k_out_3_cast_fp16")];
            tensor<string, []> sa_k_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_1_to_fp16 = cast(dtype = sa_v_in_1_to_fp16_dtype_0, x = sa_v_in_1)[name = tensor<string, []>("cast_69")];
            tensor<fp16, [1, 600, 12, 64]> var_867_cast_fp16 = mul(x = sa_v_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_868_cast_fp16 = mul(x = new_v_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_868_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_3_cast_fp16 = add(x = var_867_cast_fp16, y = var_868_cast_fp16)[name = tensor<string, []>("sa_v_out_3_cast_fp16")];
            tensor<string, []> sa_v_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_887 = const()[name = tensor<string, []>("op_887"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_889_transpose_x_0 = const()[name = tensor<string, []>("op_889_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_889_transpose_y_0 = const()[name = tensor<string, []>("op_889_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_100_perm_0 = const()[name = tensor<string, []>("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_101_perm_0 = const()[name = tensor<string, []>("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_101 = transpose(perm = transpose_101_perm_0, x = sa_k_out_3_cast_fp16)[name = tensor<string, []>("transpose_252")];
            tensor<fp16, [1, 12, 1, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = tensor<string, []>("transpose_253")];
            tensor<fp16, [1, 12, 1, 600]> var_889_cast_fp16 = matmul(transpose_x = var_889_transpose_x_0, transpose_y = var_889_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor<string, []>("op_889_cast_fp16")];
            tensor<fp16, []> var_890_to_fp16 = const()[name = tensor<string, []>("op_890_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_5_cast_fp16 = mul(x = var_889_cast_fp16, y = var_890_to_fp16)[name = tensor<string, []>("scores_5_cast_fp16")];
            tensor<fp16, []> var_908_to_fp16 = const()[name = tensor<string, []>("op_908_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_7_cast_fp16 = select(a = var_908_to_fp16, b = scores_5_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_7_cast_fp16")];
            tensor<int32, []> var_910 = const()[name = tensor<string, []>("op_910"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_3_cast_fp16 = softmax(axis = var_910, x = scores_7_cast_fp16)[name = tensor<string, []>("probs_3_cast_fp16")];
            tensor<bool, []> var_913_transpose_x_0 = const()[name = tensor<string, []>("op_913_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_913_transpose_y_0 = const()[name = tensor<string, []>("op_913_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_3_cast_fp16 = transpose(perm = var_887, x = sa_v_out_3_cast_fp16)[name = tensor<string, []>("transpose_251")];
            tensor<fp16, [1, 12, 1, 64]> var_913_cast_fp16 = matmul(transpose_x = var_913_transpose_x_0, transpose_y = var_913_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = tensor<string, []>("op_913_cast_fp16")];
            tensor<int32, [4]> var_918 = const()[name = tensor<string, []>("op_918"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_923 = const()[name = tensor<string, []>("op_923"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_919_cast_fp16 = transpose(perm = var_918, x = var_913_cast_fp16)[name = tensor<string, []>("transpose_250")];
            tensor<fp16, [1, 1, 768]> input_21_cast_fp16 = reshape(shape = var_923, x = var_919_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10665152))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11255040))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
            tensor<int32, [1]> input_25_axes_0 = const()[name = tensor<string, []>("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11256640)))];
            tensor<fp16, []> var_931_to_fp16 = const()[name = tensor<string, []>("op_931_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_931_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11258240))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11356608))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
            tensor<int32, [4]> var_944 = const()[name = tensor<string, []>("op_944"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_3_cast_fp16 = reshape(shape = var_944, x = linear_6_cast_fp16)[name = tensor<string, []>("xq_proj_3_cast_fp16")];
            tensor<int32, [4]> var_962 = const()[name = tensor<string, []>("op_962"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_964_transpose_x_0 = const()[name = tensor<string, []>("op_964_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_964_transpose_y_0 = const()[name = tensor<string, []>("op_964_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_102_perm_0 = const()[name = tensor<string, []>("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_103_perm_0 = const()[name = tensor<string, []>("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = tensor<string, []>("cast_68")];
            tensor<fp16, [1, 1, 128, 256]> transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = tensor<string, []>("transpose_248")];
            tensor<fp16, [1, 1, 1, 128]> transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = tensor<string, []>("transpose_249")];
            tensor<fp16, [1, 1, 1, 256]> var_964_cast_fp16 = matmul(transpose_x = var_964_transpose_x_0, transpose_y = var_964_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor<string, []>("op_964_cast_fp16")];
            tensor<fp16, []> var_965_to_fp16 = const()[name = tensor<string, []>("op_965_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_5_cast_fp16 = mul(x = var_964_cast_fp16, y = var_965_to_fp16)[name = tensor<string, []>("xscores_5_cast_fp16")];
            tensor<fp16, []> var_983_to_fp16 = const()[name = tensor<string, []>("op_983_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_7_cast_fp16 = select(a = var_983_to_fp16, b = xscores_5_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_7_cast_fp16")];
            tensor<int32, []> var_985 = const()[name = tensor<string, []>("op_985"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_3_cast_fp16 = softmax(axis = var_985, x = xscores_7_cast_fp16)[name = tensor<string, []>("xprobs_3_cast_fp16")];
            tensor<bool, []> var_988_transpose_x_0 = const()[name = tensor<string, []>("op_988_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_988_transpose_y_0 = const()[name = tensor<string, []>("op_988_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = tensor<string, []>("cast_67")];
            tensor<fp16, [1, 1, 256, 128]> xvT_3_cast_fp16 = transpose(perm = var_962, x = xa_v_1_to_fp16)[name = tensor<string, []>("transpose_247")];
            tensor<fp16, [1, 1, 1, 128]> var_988_cast_fp16 = matmul(transpose_x = var_988_transpose_x_0, transpose_y = var_988_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
            tensor<int32, [4]> var_993 = const()[name = tensor<string, []>("op_993"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_998 = const()[name = tensor<string, []>("op_998"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_994_cast_fp16 = transpose(perm = var_993, x = var_988_cast_fp16)[name = tensor<string, []>("transpose_246")];
            tensor<fp16, [1, 1, 128]> input_27_cast_fp16 = reshape(shape = var_998, x = var_994_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11356928))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11455296))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
            tensor<int32, [1]> x_9_axes_0 = const()[name = tensor<string, []>("x_9_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11456896)))];
            tensor<fp16, []> var_1006_to_fp16 = const()[name = tensor<string, []>("op_1006_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1006_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
            tensor<int32, [3]> var_1022 = const()[name = tensor<string, []>("op_1022"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_5_pad_type_0 = const()[name = tensor<string, []>("y_5_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_5_strides_0 = const()[name = tensor<string, []>("y_5_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_5_pad_0 = const()[name = tensor<string, []>("y_5_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_5_dilations_0 = const()[name = tensor<string, []>("y_5_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_5_groups_0 = const()[name = tensor<string, []>("y_5_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11458496))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13817856))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_11_cast_fp16 = transpose(perm = var_1022, x = x_9_cast_fp16)[name = tensor<string, []>("transpose_245")];
            tensor<fp16, [1, 3072, 1]> y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor<string, []>("y_5_cast_fp16")];
            tensor<string, []> x_13_mode_0 = const()[name = tensor<string, []>("x_13_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
            tensor<string, []> y_7_pad_type_0 = const()[name = tensor<string, []>("y_7_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_7_strides_0 = const()[name = tensor<string, []>("y_7_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_7_pad_0 = const()[name = tensor<string, []>("y_7_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_7_dilations_0 = const()[name = tensor<string, []>("y_7_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_7_groups_0 = const()[name = tensor<string, []>("y_7_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13824064))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16183424))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = tensor<string, []>("y_7_cast_fp16")];
            tensor<int32, [3]> var_1040 = const()[name = tensor<string, []>("op_1040"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1041_cast_fp16 = transpose(perm = var_1040, x = y_7_cast_fp16)[name = tensor<string, []>("transpose_244")];
            tensor<fp16, [1, 1, 768]> input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1041_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16185024)))];
            tensor<fp16, []> var_1045_to_fp16 = const()[name = tensor<string, []>("op_1045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_1045_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16186624))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17956160))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
            tensor<int32, [5]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_11_cast_fp16 = reshape(shape = var_1059, x = linear_8_cast_fp16)[name = tensor<string, []>("qkv_11_cast_fp16")];
            tensor<int32, [5]> q_5_begin_0 = const()[name = tensor<string, []>("q_5_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_5_end_0 = const()[name = tensor<string, []>("q_5_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_5_end_mask_0 = const()[name = tensor<string, []>("q_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_5_squeeze_mask_0 = const()[name = tensor<string, []>("q_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")];
            tensor<int32, [5]> new_k_5_begin_0 = const()[name = tensor<string, []>("new_k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_5_end_0 = const()[name = tensor<string, []>("new_k_5_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_5_end_mask_0 = const()[name = tensor<string, []>("new_k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_k_5_cast_fp16")];
            tensor<int32, [5]> new_v_5_begin_0 = const()[name = tensor<string, []>("new_v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_5_end_0 = const()[name = tensor<string, []>("new_v_5_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_5_end_mask_0 = const()[name = tensor<string, []>("new_v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_v_5_cast_fp16")];
            tensor<string, []> sa_k_in_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_2_to_fp16 = cast(dtype = sa_k_in_2_to_fp16_dtype_0, x = sa_k_in_2)[name = tensor<string, []>("cast_66")];
            tensor<fp16, [1, 600, 12, 64]> var_1120_cast_fp16 = mul(x = sa_k_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1120_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1121_cast_fp16 = mul(x = new_k_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1121_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_5_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1121_cast_fp16)[name = tensor<string, []>("sa_k_out_5_cast_fp16")];
            tensor<string, []> sa_k_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_2_to_fp16 = cast(dtype = sa_v_in_2_to_fp16_dtype_0, x = sa_v_in_2)[name = tensor<string, []>("cast_65")];
            tensor<fp16, [1, 600, 12, 64]> var_1127_cast_fp16 = mul(x = sa_v_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1127_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1128_cast_fp16 = mul(x = new_v_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1128_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_5_cast_fp16 = add(x = var_1127_cast_fp16, y = var_1128_cast_fp16)[name = tensor<string, []>("sa_v_out_5_cast_fp16")];
            tensor<string, []> sa_v_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1147 = const()[name = tensor<string, []>("op_1147"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1149_transpose_x_0 = const()[name = tensor<string, []>("op_1149_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1149_transpose_y_0 = const()[name = tensor<string, []>("op_1149_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_104_perm_0 = const()[name = tensor<string, []>("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_105_perm_0 = const()[name = tensor<string, []>("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_105 = transpose(perm = transpose_105_perm_0, x = sa_k_out_5_cast_fp16)[name = tensor<string, []>("transpose_242")];
            tensor<fp16, [1, 12, 1, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_5_cast_fp16)[name = tensor<string, []>("transpose_243")];
            tensor<fp16, [1, 12, 1, 600]> var_1149_cast_fp16 = matmul(transpose_x = var_1149_transpose_x_0, transpose_y = var_1149_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor<string, []>("op_1149_cast_fp16")];
            tensor<fp16, []> var_1150_to_fp16 = const()[name = tensor<string, []>("op_1150_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_9_cast_fp16 = mul(x = var_1149_cast_fp16, y = var_1150_to_fp16)[name = tensor<string, []>("scores_9_cast_fp16")];
            tensor<fp16, []> var_1168_to_fp16 = const()[name = tensor<string, []>("op_1168_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_11_cast_fp16 = select(a = var_1168_to_fp16, b = scores_9_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_11_cast_fp16")];
            tensor<int32, []> var_1170 = const()[name = tensor<string, []>("op_1170"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_5_cast_fp16 = softmax(axis = var_1170, x = scores_11_cast_fp16)[name = tensor<string, []>("probs_5_cast_fp16")];
            tensor<bool, []> var_1173_transpose_x_0 = const()[name = tensor<string, []>("op_1173_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1173_transpose_y_0 = const()[name = tensor<string, []>("op_1173_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_5_cast_fp16 = transpose(perm = var_1147, x = sa_v_out_5_cast_fp16)[name = tensor<string, []>("transpose_241")];
            tensor<fp16, [1, 12, 1, 64]> var_1173_cast_fp16 = matmul(transpose_x = var_1173_transpose_x_0, transpose_y = var_1173_transpose_y_0, x = probs_5_cast_fp16, y = v_t_5_cast_fp16)[name = tensor<string, []>("op_1173_cast_fp16")];
            tensor<int32, [4]> var_1178 = const()[name = tensor<string, []>("op_1178"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1183 = const()[name = tensor<string, []>("op_1183"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1179_cast_fp16 = transpose(perm = var_1178, x = var_1173_cast_fp16)[name = tensor<string, []>("transpose_240")];
            tensor<fp16, [1, 1, 768]> input_35_cast_fp16 = reshape(shape = var_1183, x = var_1179_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17960832))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18550720))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_37_cast_fp16 = add(x = input_31_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
            tensor<int32, [1]> input_39_axes_0 = const()[name = tensor<string, []>("input_39_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18552320)))];
            tensor<fp16, []> var_1191_to_fp16 = const()[name = tensor<string, []>("op_1191_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_39_cast_fp16 = layer_norm(axes = input_39_axes_0, epsilon = var_1191_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18553920))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18652288))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_10_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
            tensor<int32, [4]> var_1204 = const()[name = tensor<string, []>("op_1204"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_5_cast_fp16 = reshape(shape = var_1204, x = linear_10_cast_fp16)[name = tensor<string, []>("xq_proj_5_cast_fp16")];
            tensor<int32, [4]> var_1222 = const()[name = tensor<string, []>("op_1222"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1224_transpose_x_0 = const()[name = tensor<string, []>("op_1224_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1224_transpose_y_0 = const()[name = tensor<string, []>("op_1224_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_106_perm_0 = const()[name = tensor<string, []>("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_107_perm_0 = const()[name = tensor<string, []>("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_2_to_fp16 = cast(dtype = xa_k_2_to_fp16_dtype_0, x = xa_k_2)[name = tensor<string, []>("cast_64")];
            tensor<fp16, [1, 1, 128, 256]> transpose_107 = transpose(perm = transpose_107_perm_0, x = xa_k_2_to_fp16)[name = tensor<string, []>("transpose_238")];
            tensor<fp16, [1, 1, 1, 128]> transpose_106 = transpose(perm = transpose_106_perm_0, x = xq_proj_5_cast_fp16)[name = tensor<string, []>("transpose_239")];
            tensor<fp16, [1, 1, 1, 256]> var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor<string, []>("op_1224_cast_fp16")];
            tensor<fp16, []> var_1225_to_fp16 = const()[name = tensor<string, []>("op_1225_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_9_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor<string, []>("xscores_9_cast_fp16")];
            tensor<fp16, []> var_1243_to_fp16 = const()[name = tensor<string, []>("op_1243_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_11_cast_fp16 = select(a = var_1243_to_fp16, b = xscores_9_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_11_cast_fp16")];
            tensor<int32, []> var_1245 = const()[name = tensor<string, []>("op_1245"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_5_cast_fp16 = softmax(axis = var_1245, x = xscores_11_cast_fp16)[name = tensor<string, []>("xprobs_5_cast_fp16")];
            tensor<bool, []> var_1248_transpose_x_0 = const()[name = tensor<string, []>("op_1248_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1248_transpose_y_0 = const()[name = tensor<string, []>("op_1248_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_2_to_fp16 = cast(dtype = xa_v_2_to_fp16_dtype_0, x = xa_v_2)[name = tensor<string, []>("cast_63")];
            tensor<fp16, [1, 1, 256, 128]> xvT_5_cast_fp16 = transpose(perm = var_1222, x = xa_v_2_to_fp16)[name = tensor<string, []>("transpose_237")];
            tensor<fp16, [1, 1, 1, 128]> var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = xprobs_5_cast_fp16, y = xvT_5_cast_fp16)[name = tensor<string, []>("op_1248_cast_fp16")];
            tensor<int32, [4]> var_1253 = const()[name = tensor<string, []>("op_1253"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1258 = const()[name = tensor<string, []>("op_1258"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1254_cast_fp16 = transpose(perm = var_1253, x = var_1248_cast_fp16)[name = tensor<string, []>("transpose_236")];
            tensor<fp16, [1, 1, 128]> input_41_cast_fp16 = reshape(shape = var_1258, x = var_1254_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18652608))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18750976))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_43_cast_fp16 = add(x = input_37_cast_fp16, y = linear_11_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = tensor<string, []>("x_17_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18752576)))];
            tensor<fp16, []> var_1266_to_fp16 = const()[name = tensor<string, []>("op_1266_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_1266_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
            tensor<int32, [3]> var_1282 = const()[name = tensor<string, []>("op_1282"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_9_pad_type_0 = const()[name = tensor<string, []>("y_9_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_9_strides_0 = const()[name = tensor<string, []>("y_9_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_9_pad_0 = const()[name = tensor<string, []>("y_9_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_9_dilations_0 = const()[name = tensor<string, []>("y_9_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_9_groups_0 = const()[name = tensor<string, []>("y_9_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18754176))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21113536))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_19_cast_fp16 = transpose(perm = var_1282, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_235")];
            tensor<fp16, [1, 3072, 1]> y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_19_cast_fp16)[name = tensor<string, []>("y_9_cast_fp16")];
            tensor<string, []> x_21_mode_0 = const()[name = tensor<string, []>("x_21_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_21_cast_fp16 = gelu(mode = x_21_mode_0, x = y_9_cast_fp16)[name = tensor<string, []>("x_21_cast_fp16")];
            tensor<string, []> y_11_pad_type_0 = const()[name = tensor<string, []>("y_11_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_11_strides_0 = const()[name = tensor<string, []>("y_11_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_11_pad_0 = const()[name = tensor<string, []>("y_11_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_11_dilations_0 = const()[name = tensor<string, []>("y_11_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_11_groups_0 = const()[name = tensor<string, []>("y_11_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21119744))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23479104))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = tensor<string, []>("y_11_cast_fp16")];
            tensor<int32, [3]> var_1300 = const()[name = tensor<string, []>("op_1300"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1301_cast_fp16 = transpose(perm = var_1300, x = y_11_cast_fp16)[name = tensor<string, []>("transpose_234")];
            tensor<fp16, [1, 1, 768]> input_45_cast_fp16 = add(x = input_43_cast_fp16, y = var_1301_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23480704)))];
            tensor<fp16, []> var_1305_to_fp16 = const()[name = tensor<string, []>("op_1305_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, epsilon = var_1305_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23482304))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25251840))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
            tensor<int32, [5]> var_1319 = const()[name = tensor<string, []>("op_1319"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_15_cast_fp16 = reshape(shape = var_1319, x = linear_12_cast_fp16)[name = tensor<string, []>("qkv_15_cast_fp16")];
            tensor<int32, [5]> q_7_begin_0 = const()[name = tensor<string, []>("q_7_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_7_end_0 = const()[name = tensor<string, []>("q_7_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_7_end_mask_0 = const()[name = tensor<string, []>("q_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_7_squeeze_mask_0 = const()[name = tensor<string, []>("q_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_7_cast_fp16 = slice_by_index(begin = q_7_begin_0, end = q_7_end_0, end_mask = q_7_end_mask_0, squeeze_mask = q_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
            tensor<int32, [5]> new_k_7_begin_0 = const()[name = tensor<string, []>("new_k_7_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_7_end_0 = const()[name = tensor<string, []>("new_k_7_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_7_end_mask_0 = const()[name = tensor<string, []>("new_k_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_k_7_cast_fp16")];
            tensor<int32, [5]> new_v_7_begin_0 = const()[name = tensor<string, []>("new_v_7_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_7_end_0 = const()[name = tensor<string, []>("new_v_7_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_7_end_mask_0 = const()[name = tensor<string, []>("new_v_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_v_7_cast_fp16")];
            tensor<string, []> sa_k_in_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_3_to_fp16 = cast(dtype = sa_k_in_3_to_fp16_dtype_0, x = sa_k_in_3)[name = tensor<string, []>("cast_62")];
            tensor<fp16, [1, 600, 12, 64]> var_1380_cast_fp16 = mul(x = sa_k_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1381_cast_fp16 = mul(x = new_k_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1381_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_7_cast_fp16 = add(x = var_1380_cast_fp16, y = var_1381_cast_fp16)[name = tensor<string, []>("sa_k_out_7_cast_fp16")];
            tensor<string, []> sa_k_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_3_to_fp16 = cast(dtype = sa_v_in_3_to_fp16_dtype_0, x = sa_v_in_3)[name = tensor<string, []>("cast_61")];
            tensor<fp16, [1, 600, 12, 64]> var_1387_cast_fp16 = mul(x = sa_v_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1387_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1388_cast_fp16 = mul(x = new_v_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1388_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_7_cast_fp16 = add(x = var_1387_cast_fp16, y = var_1388_cast_fp16)[name = tensor<string, []>("sa_v_out_7_cast_fp16")];
            tensor<string, []> sa_v_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1407 = const()[name = tensor<string, []>("op_1407"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1409_transpose_x_0 = const()[name = tensor<string, []>("op_1409_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1409_transpose_y_0 = const()[name = tensor<string, []>("op_1409_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_108_perm_0 = const()[name = tensor<string, []>("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_109_perm_0 = const()[name = tensor<string, []>("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_109 = transpose(perm = transpose_109_perm_0, x = sa_k_out_7_cast_fp16)[name = tensor<string, []>("transpose_232")];
            tensor<fp16, [1, 12, 1, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_7_cast_fp16)[name = tensor<string, []>("transpose_233")];
            tensor<fp16, [1, 12, 1, 600]> var_1409_cast_fp16 = matmul(transpose_x = var_1409_transpose_x_0, transpose_y = var_1409_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor<string, []>("op_1409_cast_fp16")];
            tensor<fp16, []> var_1410_to_fp16 = const()[name = tensor<string, []>("op_1410_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_13_cast_fp16 = mul(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = tensor<string, []>("scores_13_cast_fp16")];
            tensor<fp16, []> var_1428_to_fp16 = const()[name = tensor<string, []>("op_1428_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_15_cast_fp16 = select(a = var_1428_to_fp16, b = scores_13_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_15_cast_fp16")];
            tensor<int32, []> var_1430 = const()[name = tensor<string, []>("op_1430"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_7_cast_fp16 = softmax(axis = var_1430, x = scores_15_cast_fp16)[name = tensor<string, []>("probs_7_cast_fp16")];
            tensor<bool, []> var_1433_transpose_x_0 = const()[name = tensor<string, []>("op_1433_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1433_transpose_y_0 = const()[name = tensor<string, []>("op_1433_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_7_cast_fp16 = transpose(perm = var_1407, x = sa_v_out_7_cast_fp16)[name = tensor<string, []>("transpose_231")];
            tensor<fp16, [1, 12, 1, 64]> var_1433_cast_fp16 = matmul(transpose_x = var_1433_transpose_x_0, transpose_y = var_1433_transpose_y_0, x = probs_7_cast_fp16, y = v_t_7_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
            tensor<int32, [4]> var_1438 = const()[name = tensor<string, []>("op_1438"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1443 = const()[name = tensor<string, []>("op_1443"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1439_cast_fp16 = transpose(perm = var_1438, x = var_1433_cast_fp16)[name = tensor<string, []>("transpose_230")];
            tensor<fp16, [1, 1, 768]> input_49_cast_fp16 = reshape(shape = var_1443, x = var_1439_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25256512))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25846400))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_51_cast_fp16 = add(x = input_45_cast_fp16, y = linear_13_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25848000)))];
            tensor<fp16, []> var_1451_to_fp16 = const()[name = tensor<string, []>("op_1451_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, epsilon = var_1451_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25849600))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25947968))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
            tensor<int32, [4]> var_1464 = const()[name = tensor<string, []>("op_1464"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_7_cast_fp16 = reshape(shape = var_1464, x = linear_14_cast_fp16)[name = tensor<string, []>("xq_proj_7_cast_fp16")];
            tensor<int32, [4]> var_1482 = const()[name = tensor<string, []>("op_1482"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1484_transpose_x_0 = const()[name = tensor<string, []>("op_1484_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1484_transpose_y_0 = const()[name = tensor<string, []>("op_1484_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_110_perm_0 = const()[name = tensor<string, []>("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_111_perm_0 = const()[name = tensor<string, []>("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_3_to_fp16 = cast(dtype = xa_k_3_to_fp16_dtype_0, x = xa_k_3)[name = tensor<string, []>("cast_60")];
            tensor<fp16, [1, 1, 128, 256]> transpose_111 = transpose(perm = transpose_111_perm_0, x = xa_k_3_to_fp16)[name = tensor<string, []>("transpose_228")];
            tensor<fp16, [1, 1, 1, 128]> transpose_110 = transpose(perm = transpose_110_perm_0, x = xq_proj_7_cast_fp16)[name = tensor<string, []>("transpose_229")];
            tensor<fp16, [1, 1, 1, 256]> var_1484_cast_fp16 = matmul(transpose_x = var_1484_transpose_x_0, transpose_y = var_1484_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor<string, []>("op_1484_cast_fp16")];
            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_13_cast_fp16 = mul(x = var_1484_cast_fp16, y = var_1485_to_fp16)[name = tensor<string, []>("xscores_13_cast_fp16")];
            tensor<fp16, []> var_1503_to_fp16 = const()[name = tensor<string, []>("op_1503_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_15_cast_fp16 = select(a = var_1503_to_fp16, b = xscores_13_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_15_cast_fp16")];
            tensor<int32, []> var_1505 = const()[name = tensor<string, []>("op_1505"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_7_cast_fp16 = softmax(axis = var_1505, x = xscores_15_cast_fp16)[name = tensor<string, []>("xprobs_7_cast_fp16")];
            tensor<bool, []> var_1508_transpose_x_0 = const()[name = tensor<string, []>("op_1508_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1508_transpose_y_0 = const()[name = tensor<string, []>("op_1508_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_3_to_fp16 = cast(dtype = xa_v_3_to_fp16_dtype_0, x = xa_v_3)[name = tensor<string, []>("cast_59")];
            tensor<fp16, [1, 1, 256, 128]> xvT_7_cast_fp16 = transpose(perm = var_1482, x = xa_v_3_to_fp16)[name = tensor<string, []>("transpose_227")];
            tensor<fp16, [1, 1, 1, 128]> var_1508_cast_fp16 = matmul(transpose_x = var_1508_transpose_x_0, transpose_y = var_1508_transpose_y_0, x = xprobs_7_cast_fp16, y = xvT_7_cast_fp16)[name = tensor<string, []>("op_1508_cast_fp16")];
            tensor<int32, [4]> var_1513 = const()[name = tensor<string, []>("op_1513"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1518 = const()[name = tensor<string, []>("op_1518"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1514_cast_fp16 = transpose(perm = var_1513, x = var_1508_cast_fp16)[name = tensor<string, []>("transpose_226")];
            tensor<fp16, [1, 1, 128]> input_55_cast_fp16 = reshape(shape = var_1518, x = var_1514_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25948288))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26046656))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_57_cast_fp16 = add(x = input_51_cast_fp16, y = linear_15_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
            tensor<int32, [1]> x_25_axes_0 = const()[name = tensor<string, []>("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26048256)))];
            tensor<fp16, []> var_1526_to_fp16 = const()[name = tensor<string, []>("op_1526_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_1526_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
            tensor<int32, [3]> var_1542 = const()[name = tensor<string, []>("op_1542"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_13_pad_type_0 = const()[name = tensor<string, []>("y_13_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_13_strides_0 = const()[name = tensor<string, []>("y_13_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_13_pad_0 = const()[name = tensor<string, []>("y_13_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_13_dilations_0 = const()[name = tensor<string, []>("y_13_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_13_groups_0 = const()[name = tensor<string, []>("y_13_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26049856))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28409216))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_27_cast_fp16 = transpose(perm = var_1542, x = x_25_cast_fp16)[name = tensor<string, []>("transpose_225")];
            tensor<fp16, [1, 3072, 1]> y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = tensor<string, []>("y_13_cast_fp16")];
            tensor<string, []> x_29_mode_0 = const()[name = tensor<string, []>("x_29_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_13_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")];
            tensor<string, []> y_15_pad_type_0 = const()[name = tensor<string, []>("y_15_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_15_strides_0 = const()[name = tensor<string, []>("y_15_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_15_pad_0 = const()[name = tensor<string, []>("y_15_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_15_dilations_0 = const()[name = tensor<string, []>("y_15_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_15_groups_0 = const()[name = tensor<string, []>("y_15_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28415424))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30774784))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = tensor<string, []>("y_15_cast_fp16")];
            tensor<int32, [3]> var_1560 = const()[name = tensor<string, []>("op_1560"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1561_cast_fp16 = transpose(perm = var_1560, x = y_15_cast_fp16)[name = tensor<string, []>("transpose_224")];
            tensor<fp16, [1, 1, 768]> input_59_cast_fp16 = add(x = input_57_cast_fp16, y = var_1561_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
            tensor<int32, [1]> input_61_axes_0 = const()[name = tensor<string, []>("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30776384)))];
            tensor<fp16, []> var_1565_to_fp16 = const()[name = tensor<string, []>("op_1565_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, epsilon = var_1565_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30777984))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32547520))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
            tensor<int32, [5]> var_1579 = const()[name = tensor<string, []>("op_1579"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_19_cast_fp16 = reshape(shape = var_1579, x = linear_16_cast_fp16)[name = tensor<string, []>("qkv_19_cast_fp16")];
            tensor<int32, [5]> q_9_begin_0 = const()[name = tensor<string, []>("q_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_9_end_0 = const()[name = tensor<string, []>("q_9_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_9_end_mask_0 = const()[name = tensor<string, []>("q_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_9_squeeze_mask_0 = const()[name = tensor<string, []>("q_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
            tensor<int32, [5]> new_k_9_begin_0 = const()[name = tensor<string, []>("new_k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_9_end_0 = const()[name = tensor<string, []>("new_k_9_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_9_end_mask_0 = const()[name = tensor<string, []>("new_k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_k_9_cast_fp16")];
            tensor<int32, [5]> new_v_9_begin_0 = const()[name = tensor<string, []>("new_v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_9_end_0 = const()[name = tensor<string, []>("new_v_9_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_9_end_mask_0 = const()[name = tensor<string, []>("new_v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_v_9_cast_fp16")];
            tensor<string, []> sa_k_in_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_4_to_fp16 = cast(dtype = sa_k_in_4_to_fp16_dtype_0, x = sa_k_in_4)[name = tensor<string, []>("cast_58")];
            tensor<fp16, [1, 600, 12, 64]> var_1640_cast_fp16 = mul(x = sa_k_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1641_cast_fp16 = mul(x = new_k_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_9_cast_fp16 = add(x = var_1640_cast_fp16, y = var_1641_cast_fp16)[name = tensor<string, []>("sa_k_out_9_cast_fp16")];
            tensor<string, []> sa_k_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_4_to_fp16 = cast(dtype = sa_v_in_4_to_fp16_dtype_0, x = sa_v_in_4)[name = tensor<string, []>("cast_57")];
            tensor<fp16, [1, 600, 12, 64]> var_1647_cast_fp16 = mul(x = sa_v_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1647_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1648_cast_fp16 = mul(x = new_v_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1648_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_9_cast_fp16 = add(x = var_1647_cast_fp16, y = var_1648_cast_fp16)[name = tensor<string, []>("sa_v_out_9_cast_fp16")];
            tensor<string, []> sa_v_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1667 = const()[name = tensor<string, []>("op_1667"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1669_transpose_x_0 = const()[name = tensor<string, []>("op_1669_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1669_transpose_y_0 = const()[name = tensor<string, []>("op_1669_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_112_perm_0 = const()[name = tensor<string, []>("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_113_perm_0 = const()[name = tensor<string, []>("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_113 = transpose(perm = transpose_113_perm_0, x = sa_k_out_9_cast_fp16)[name = tensor<string, []>("transpose_222")];
            tensor<fp16, [1, 12, 1, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_9_cast_fp16)[name = tensor<string, []>("transpose_223")];
            tensor<fp16, [1, 12, 1, 600]> var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_0, transpose_y = var_1669_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor<string, []>("op_1669_cast_fp16")];
            tensor<fp16, []> var_1670_to_fp16 = const()[name = tensor<string, []>("op_1670_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_17_cast_fp16 = mul(x = var_1669_cast_fp16, y = var_1670_to_fp16)[name = tensor<string, []>("scores_17_cast_fp16")];
            tensor<fp16, []> var_1688_to_fp16 = const()[name = tensor<string, []>("op_1688_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_19_cast_fp16 = select(a = var_1688_to_fp16, b = scores_17_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_19_cast_fp16")];
            tensor<int32, []> var_1690 = const()[name = tensor<string, []>("op_1690"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_9_cast_fp16 = softmax(axis = var_1690, x = scores_19_cast_fp16)[name = tensor<string, []>("probs_9_cast_fp16")];
            tensor<bool, []> var_1693_transpose_x_0 = const()[name = tensor<string, []>("op_1693_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1693_transpose_y_0 = const()[name = tensor<string, []>("op_1693_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_9_cast_fp16 = transpose(perm = var_1667, x = sa_v_out_9_cast_fp16)[name = tensor<string, []>("transpose_221")];
            tensor<fp16, [1, 12, 1, 64]> var_1693_cast_fp16 = matmul(transpose_x = var_1693_transpose_x_0, transpose_y = var_1693_transpose_y_0, x = probs_9_cast_fp16, y = v_t_9_cast_fp16)[name = tensor<string, []>("op_1693_cast_fp16")];
            tensor<int32, [4]> var_1698 = const()[name = tensor<string, []>("op_1698"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1703 = const()[name = tensor<string, []>("op_1703"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1699_cast_fp16 = transpose(perm = var_1698, x = var_1693_cast_fp16)[name = tensor<string, []>("transpose_220")];
            tensor<fp16, [1, 1, 768]> input_63_cast_fp16 = reshape(shape = var_1703, x = var_1699_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32552192))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33142080))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_65_cast_fp16 = add(x = input_59_cast_fp16, y = linear_17_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33143680)))];
            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, epsilon = var_1711_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33145280))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33243648))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
            tensor<int32, [4]> var_1724 = const()[name = tensor<string, []>("op_1724"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_9_cast_fp16 = reshape(shape = var_1724, x = linear_18_cast_fp16)[name = tensor<string, []>("xq_proj_9_cast_fp16")];
            tensor<int32, [4]> var_1742 = const()[name = tensor<string, []>("op_1742"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1744_transpose_x_0 = const()[name = tensor<string, []>("op_1744_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1744_transpose_y_0 = const()[name = tensor<string, []>("op_1744_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_114_perm_0 = const()[name = tensor<string, []>("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_115_perm_0 = const()[name = tensor<string, []>("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_4_to_fp16 = cast(dtype = xa_k_4_to_fp16_dtype_0, x = xa_k_4)[name = tensor<string, []>("cast_56")];
            tensor<fp16, [1, 1, 128, 256]> transpose_115 = transpose(perm = transpose_115_perm_0, x = xa_k_4_to_fp16)[name = tensor<string, []>("transpose_218")];
            tensor<fp16, [1, 1, 1, 128]> transpose_114 = transpose(perm = transpose_114_perm_0, x = xq_proj_9_cast_fp16)[name = tensor<string, []>("transpose_219")];
            tensor<fp16, [1, 1, 1, 256]> var_1744_cast_fp16 = matmul(transpose_x = var_1744_transpose_x_0, transpose_y = var_1744_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor<string, []>("op_1744_cast_fp16")];
            tensor<fp16, []> var_1745_to_fp16 = const()[name = tensor<string, []>("op_1745_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_17_cast_fp16 = mul(x = var_1744_cast_fp16, y = var_1745_to_fp16)[name = tensor<string, []>("xscores_17_cast_fp16")];
            tensor<fp16, []> var_1763_to_fp16 = const()[name = tensor<string, []>("op_1763_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_19_cast_fp16 = select(a = var_1763_to_fp16, b = xscores_17_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_19_cast_fp16")];
            tensor<int32, []> var_1765 = const()[name = tensor<string, []>("op_1765"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_9_cast_fp16 = softmax(axis = var_1765, x = xscores_19_cast_fp16)[name = tensor<string, []>("xprobs_9_cast_fp16")];
            tensor<bool, []> var_1768_transpose_x_0 = const()[name = tensor<string, []>("op_1768_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1768_transpose_y_0 = const()[name = tensor<string, []>("op_1768_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_4_to_fp16 = cast(dtype = xa_v_4_to_fp16_dtype_0, x = xa_v_4)[name = tensor<string, []>("cast_55")];
            tensor<fp16, [1, 1, 256, 128]> xvT_9_cast_fp16 = transpose(perm = var_1742, x = xa_v_4_to_fp16)[name = tensor<string, []>("transpose_217")];
            tensor<fp16, [1, 1, 1, 128]> var_1768_cast_fp16 = matmul(transpose_x = var_1768_transpose_x_0, transpose_y = var_1768_transpose_y_0, x = xprobs_9_cast_fp16, y = xvT_9_cast_fp16)[name = tensor<string, []>("op_1768_cast_fp16")];
            tensor<int32, [4]> var_1773 = const()[name = tensor<string, []>("op_1773"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1778 = const()[name = tensor<string, []>("op_1778"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1774_cast_fp16 = transpose(perm = var_1773, x = var_1768_cast_fp16)[name = tensor<string, []>("transpose_216")];
            tensor<fp16, [1, 1, 128]> input_69_cast_fp16 = reshape(shape = var_1778, x = var_1774_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33243968))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33342336))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_71_cast_fp16 = add(x = input_65_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
            tensor<int32, [1]> x_33_axes_0 = const()[name = tensor<string, []>("x_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33343936)))];
            tensor<fp16, []> var_1786_to_fp16 = const()[name = tensor<string, []>("op_1786_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_1786_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
            tensor<int32, [3]> var_1802 = const()[name = tensor<string, []>("op_1802"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_17_pad_type_0 = const()[name = tensor<string, []>("y_17_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_17_strides_0 = const()[name = tensor<string, []>("y_17_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_17_pad_0 = const()[name = tensor<string, []>("y_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_17_dilations_0 = const()[name = tensor<string, []>("y_17_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_17_groups_0 = const()[name = tensor<string, []>("y_17_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33345536))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35704896))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_35_cast_fp16 = transpose(perm = var_1802, x = x_33_cast_fp16)[name = tensor<string, []>("transpose_215")];
            tensor<fp16, [1, 3072, 1]> y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_35_cast_fp16)[name = tensor<string, []>("y_17_cast_fp16")];
            tensor<string, []> x_37_mode_0 = const()[name = tensor<string, []>("x_37_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = y_17_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")];
            tensor<string, []> y_19_pad_type_0 = const()[name = tensor<string, []>("y_19_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_19_strides_0 = const()[name = tensor<string, []>("y_19_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_19_pad_0 = const()[name = tensor<string, []>("y_19_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_19_dilations_0 = const()[name = tensor<string, []>("y_19_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_19_groups_0 = const()[name = tensor<string, []>("y_19_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35711104))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38070464))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_19_cast_fp16 = conv(dilations = y_19_dilations_0, groups = y_19_groups_0, pad = y_19_pad_0, pad_type = y_19_pad_type_0, strides = y_19_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = tensor<string, []>("y_19_cast_fp16")];
            tensor<int32, [3]> var_1820 = const()[name = tensor<string, []>("op_1820"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1821_cast_fp16 = transpose(perm = var_1820, x = y_19_cast_fp16)[name = tensor<string, []>("transpose_214")];
            tensor<fp16, [1, 1, 768]> input_73_cast_fp16 = add(x = input_71_cast_fp16, y = var_1821_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
            tensor<int32, [1]> input_75_axes_0 = const()[name = tensor<string, []>("input_75_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38072064)))];
            tensor<fp16, []> var_1825_to_fp16 = const()[name = tensor<string, []>("op_1825_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_75_cast_fp16 = layer_norm(axes = input_75_axes_0, epsilon = var_1825_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38073664))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39843200))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
            tensor<int32, [5]> var_1839 = const()[name = tensor<string, []>("op_1839"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_23_cast_fp16 = reshape(shape = var_1839, x = linear_20_cast_fp16)[name = tensor<string, []>("qkv_23_cast_fp16")];
            tensor<int32, [5]> q_11_begin_0 = const()[name = tensor<string, []>("q_11_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_11_end_0 = const()[name = tensor<string, []>("q_11_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_11_end_mask_0 = const()[name = tensor<string, []>("q_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_11_squeeze_mask_0 = const()[name = tensor<string, []>("q_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_11_cast_fp16 = slice_by_index(begin = q_11_begin_0, end = q_11_end_0, end_mask = q_11_end_mask_0, squeeze_mask = q_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("q_11_cast_fp16")];
            tensor<int32, [5]> new_k_11_begin_0 = const()[name = tensor<string, []>("new_k_11_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_11_end_0 = const()[name = tensor<string, []>("new_k_11_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_11_end_mask_0 = const()[name = tensor<string, []>("new_k_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_k_11_cast_fp16")];
            tensor<int32, [5]> new_v_11_begin_0 = const()[name = tensor<string, []>("new_v_11_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_11_end_0 = const()[name = tensor<string, []>("new_v_11_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_11_end_mask_0 = const()[name = tensor<string, []>("new_v_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_v_11_cast_fp16")];
            tensor<string, []> sa_k_in_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_5_to_fp16 = cast(dtype = sa_k_in_5_to_fp16_dtype_0, x = sa_k_in_5)[name = tensor<string, []>("cast_54")];
            tensor<fp16, [1, 600, 12, 64]> var_1900_cast_fp16 = mul(x = sa_k_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1901_cast_fp16 = mul(x = new_k_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1901_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_11_cast_fp16 = add(x = var_1900_cast_fp16, y = var_1901_cast_fp16)[name = tensor<string, []>("sa_k_out_11_cast_fp16")];
            tensor<string, []> sa_k_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_5_to_fp16 = cast(dtype = sa_v_in_5_to_fp16_dtype_0, x = sa_v_in_5)[name = tensor<string, []>("cast_53")];
            tensor<fp16, [1, 600, 12, 64]> var_1907_cast_fp16 = mul(x = sa_v_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1908_cast_fp16 = mul(x = new_v_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_11_cast_fp16 = add(x = var_1907_cast_fp16, y = var_1908_cast_fp16)[name = tensor<string, []>("sa_v_out_11_cast_fp16")];
            tensor<string, []> sa_v_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1927 = const()[name = tensor<string, []>("op_1927"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1929_transpose_x_0 = const()[name = tensor<string, []>("op_1929_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1929_transpose_y_0 = const()[name = tensor<string, []>("op_1929_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_116_perm_0 = const()[name = tensor<string, []>("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_117_perm_0 = const()[name = tensor<string, []>("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_117 = transpose(perm = transpose_117_perm_0, x = sa_k_out_11_cast_fp16)[name = tensor<string, []>("transpose_212")];
            tensor<fp16, [1, 12, 1, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_11_cast_fp16)[name = tensor<string, []>("transpose_213")];
            tensor<fp16, [1, 12, 1, 600]> var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor<string, []>("op_1929_cast_fp16")];
            tensor<fp16, []> var_1930_to_fp16 = const()[name = tensor<string, []>("op_1930_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_21_cast_fp16 = mul(x = var_1929_cast_fp16, y = var_1930_to_fp16)[name = tensor<string, []>("scores_21_cast_fp16")];
            tensor<fp16, []> var_1948_to_fp16 = const()[name = tensor<string, []>("op_1948_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_23_cast_fp16 = select(a = var_1948_to_fp16, b = scores_21_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_23_cast_fp16")];
            tensor<int32, []> var_1950 = const()[name = tensor<string, []>("op_1950"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_11_cast_fp16 = softmax(axis = var_1950, x = scores_23_cast_fp16)[name = tensor<string, []>("probs_11_cast_fp16")];
            tensor<bool, []> var_1953_transpose_x_0 = const()[name = tensor<string, []>("op_1953_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1953_transpose_y_0 = const()[name = tensor<string, []>("op_1953_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_11_cast_fp16 = transpose(perm = var_1927, x = sa_v_out_11_cast_fp16)[name = tensor<string, []>("transpose_211")];
            tensor<fp16, [1, 12, 1, 64]> var_1953_cast_fp16 = matmul(transpose_x = var_1953_transpose_x_0, transpose_y = var_1953_transpose_y_0, x = probs_11_cast_fp16, y = v_t_11_cast_fp16)[name = tensor<string, []>("op_1953_cast_fp16")];
            tensor<int32, [4]> var_1958 = const()[name = tensor<string, []>("op_1958"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1963 = const()[name = tensor<string, []>("op_1963"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1959_cast_fp16 = transpose(perm = var_1958, x = var_1953_cast_fp16)[name = tensor<string, []>("transpose_210")];
            tensor<fp16, [1, 1, 768]> input_77_cast_fp16 = reshape(shape = var_1963, x = var_1959_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39847872))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40437760))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_79_cast_fp16 = add(x = input_73_cast_fp16, y = linear_21_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
            tensor<int32, [1]> input_81_axes_0 = const()[name = tensor<string, []>("input_81_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40439360)))];
            tensor<fp16, []> var_1971_to_fp16 = const()[name = tensor<string, []>("op_1971_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_1971_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40440960))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40539328))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
            tensor<int32, [4]> var_1984 = const()[name = tensor<string, []>("op_1984"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_11_cast_fp16 = reshape(shape = var_1984, x = linear_22_cast_fp16)[name = tensor<string, []>("xq_proj_11_cast_fp16")];
            tensor<int32, [4]> var_2002 = const()[name = tensor<string, []>("op_2002"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2004_transpose_x_0 = const()[name = tensor<string, []>("op_2004_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2004_transpose_y_0 = const()[name = tensor<string, []>("op_2004_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_118_perm_0 = const()[name = tensor<string, []>("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_119_perm_0 = const()[name = tensor<string, []>("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_5_to_fp16 = cast(dtype = xa_k_5_to_fp16_dtype_0, x = xa_k_5)[name = tensor<string, []>("cast_52")];
            tensor<fp16, [1, 1, 128, 256]> transpose_119 = transpose(perm = transpose_119_perm_0, x = xa_k_5_to_fp16)[name = tensor<string, []>("transpose_208")];
            tensor<fp16, [1, 1, 1, 128]> transpose_118 = transpose(perm = transpose_118_perm_0, x = xq_proj_11_cast_fp16)[name = tensor<string, []>("transpose_209")];
            tensor<fp16, [1, 1, 1, 256]> var_2004_cast_fp16 = matmul(transpose_x = var_2004_transpose_x_0, transpose_y = var_2004_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor<string, []>("op_2004_cast_fp16")];
            tensor<fp16, []> var_2005_to_fp16 = const()[name = tensor<string, []>("op_2005_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_21_cast_fp16 = mul(x = var_2004_cast_fp16, y = var_2005_to_fp16)[name = tensor<string, []>("xscores_21_cast_fp16")];
            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_23_cast_fp16 = select(a = var_2023_to_fp16, b = xscores_21_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_23_cast_fp16")];
            tensor<int32, []> var_2025 = const()[name = tensor<string, []>("op_2025"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_11_cast_fp16 = softmax(axis = var_2025, x = xscores_23_cast_fp16)[name = tensor<string, []>("xprobs_11_cast_fp16")];
            tensor<bool, []> var_2028_transpose_x_0 = const()[name = tensor<string, []>("op_2028_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2028_transpose_y_0 = const()[name = tensor<string, []>("op_2028_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_5_to_fp16 = cast(dtype = xa_v_5_to_fp16_dtype_0, x = xa_v_5)[name = tensor<string, []>("cast_51")];
            tensor<fp16, [1, 1, 256, 128]> xvT_11_cast_fp16 = transpose(perm = var_2002, x = xa_v_5_to_fp16)[name = tensor<string, []>("transpose_207")];
            tensor<fp16, [1, 1, 1, 128]> var_2028_cast_fp16 = matmul(transpose_x = var_2028_transpose_x_0, transpose_y = var_2028_transpose_y_0, x = xprobs_11_cast_fp16, y = xvT_11_cast_fp16)[name = tensor<string, []>("op_2028_cast_fp16")];
            tensor<int32, [4]> var_2033 = const()[name = tensor<string, []>("op_2033"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2038 = const()[name = tensor<string, []>("op_2038"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2034_cast_fp16 = transpose(perm = var_2033, x = var_2028_cast_fp16)[name = tensor<string, []>("transpose_206")];
            tensor<fp16, [1, 1, 128]> input_83_cast_fp16 = reshape(shape = var_2038, x = var_2034_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40539648))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40638016))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_85_cast_fp16 = add(x = input_79_cast_fp16, y = linear_23_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
            tensor<int32, [1]> x_41_axes_0 = const()[name = tensor<string, []>("x_41_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40639616)))];
            tensor<fp16, []> var_2046_to_fp16 = const()[name = tensor<string, []>("op_2046_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_2046_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("x_41_cast_fp16")];
            tensor<int32, [3]> var_2062 = const()[name = tensor<string, []>("op_2062"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_21_pad_type_0 = const()[name = tensor<string, []>("y_21_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_21_strides_0 = const()[name = tensor<string, []>("y_21_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_21_pad_0 = const()[name = tensor<string, []>("y_21_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_21_dilations_0 = const()[name = tensor<string, []>("y_21_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_21_groups_0 = const()[name = tensor<string, []>("y_21_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40641216))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43000576))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_43_cast_fp16 = transpose(perm = var_2062, x = x_41_cast_fp16)[name = tensor<string, []>("transpose_205")];
            tensor<fp16, [1, 3072, 1]> y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = tensor<string, []>("y_21_cast_fp16")];
            tensor<string, []> x_45_mode_0 = const()[name = tensor<string, []>("x_45_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_21_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")];
            tensor<string, []> y_23_pad_type_0 = const()[name = tensor<string, []>("y_23_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_23_strides_0 = const()[name = tensor<string, []>("y_23_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_23_pad_0 = const()[name = tensor<string, []>("y_23_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_23_dilations_0 = const()[name = tensor<string, []>("y_23_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_23_groups_0 = const()[name = tensor<string, []>("y_23_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43006784))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45366144))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = tensor<string, []>("y_23_cast_fp16")];
            tensor<int32, [3]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2081_cast_fp16 = transpose(perm = var_2080, x = y_23_cast_fp16)[name = tensor<string, []>("transpose_204")];
            tensor<fp16, [1, 1, 768]> input_87_cast_fp16 = add(x = input_85_cast_fp16, y = var_2081_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
            tensor<int32, [1]> input_89_axes_0 = const()[name = tensor<string, []>("input_89_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45367744)))];
            tensor<fp16, []> var_2085_to_fp16 = const()[name = tensor<string, []>("op_2085_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_89_cast_fp16 = layer_norm(axes = input_89_axes_0, epsilon = var_2085_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45369344))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47138880))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
            tensor<int32, [5]> var_2099 = const()[name = tensor<string, []>("op_2099"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_27_cast_fp16 = reshape(shape = var_2099, x = linear_24_cast_fp16)[name = tensor<string, []>("qkv_27_cast_fp16")];
            tensor<int32, [5]> q_13_begin_0 = const()[name = tensor<string, []>("q_13_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_13_end_0 = const()[name = tensor<string, []>("q_13_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_13_end_mask_0 = const()[name = tensor<string, []>("q_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_13_squeeze_mask_0 = const()[name = tensor<string, []>("q_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("q_13_cast_fp16")];
            tensor<int32, [5]> new_k_13_begin_0 = const()[name = tensor<string, []>("new_k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_13_end_0 = const()[name = tensor<string, []>("new_k_13_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_13_end_mask_0 = const()[name = tensor<string, []>("new_k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_k_13_cast_fp16")];
            tensor<int32, [5]> new_v_13_begin_0 = const()[name = tensor<string, []>("new_v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_13_end_0 = const()[name = tensor<string, []>("new_v_13_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_13_end_mask_0 = const()[name = tensor<string, []>("new_v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_v_13_cast_fp16")];
            tensor<string, []> sa_k_in_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_6_to_fp16 = cast(dtype = sa_k_in_6_to_fp16_dtype_0, x = sa_k_in_6)[name = tensor<string, []>("cast_50")];
            tensor<fp16, [1, 600, 12, 64]> var_2160_cast_fp16 = mul(x = sa_k_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2160_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2161_cast_fp16 = mul(x = new_k_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2161_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_13_cast_fp16 = add(x = var_2160_cast_fp16, y = var_2161_cast_fp16)[name = tensor<string, []>("sa_k_out_13_cast_fp16")];
            tensor<string, []> sa_k_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_6_to_fp16 = cast(dtype = sa_v_in_6_to_fp16_dtype_0, x = sa_v_in_6)[name = tensor<string, []>("cast_49")];
            tensor<fp16, [1, 600, 12, 64]> var_2167_cast_fp16 = mul(x = sa_v_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2167_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2168_cast_fp16 = mul(x = new_v_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_13_cast_fp16 = add(x = var_2167_cast_fp16, y = var_2168_cast_fp16)[name = tensor<string, []>("sa_v_out_13_cast_fp16")];
            tensor<string, []> sa_v_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2187 = const()[name = tensor<string, []>("op_2187"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2189_transpose_x_0 = const()[name = tensor<string, []>("op_2189_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2189_transpose_y_0 = const()[name = tensor<string, []>("op_2189_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_120_perm_0 = const()[name = tensor<string, []>("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_121_perm_0 = const()[name = tensor<string, []>("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_121 = transpose(perm = transpose_121_perm_0, x = sa_k_out_13_cast_fp16)[name = tensor<string, []>("transpose_202")];
            tensor<fp16, [1, 12, 1, 64]> transpose_120 = transpose(perm = transpose_120_perm_0, x = q_13_cast_fp16)[name = tensor<string, []>("transpose_203")];
            tensor<fp16, [1, 12, 1, 600]> var_2189_cast_fp16 = matmul(transpose_x = var_2189_transpose_x_0, transpose_y = var_2189_transpose_y_0, x = transpose_120, y = transpose_121)[name = tensor<string, []>("op_2189_cast_fp16")];
            tensor<fp16, []> var_2190_to_fp16 = const()[name = tensor<string, []>("op_2190_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_25_cast_fp16 = mul(x = var_2189_cast_fp16, y = var_2190_to_fp16)[name = tensor<string, []>("scores_25_cast_fp16")];
            tensor<fp16, []> var_2208_to_fp16 = const()[name = tensor<string, []>("op_2208_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_27_cast_fp16 = select(a = var_2208_to_fp16, b = scores_25_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_27_cast_fp16")];
            tensor<int32, []> var_2210 = const()[name = tensor<string, []>("op_2210"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_13_cast_fp16 = softmax(axis = var_2210, x = scores_27_cast_fp16)[name = tensor<string, []>("probs_13_cast_fp16")];
            tensor<bool, []> var_2213_transpose_x_0 = const()[name = tensor<string, []>("op_2213_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2213_transpose_y_0 = const()[name = tensor<string, []>("op_2213_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_13_cast_fp16 = transpose(perm = var_2187, x = sa_v_out_13_cast_fp16)[name = tensor<string, []>("transpose_201")];
            tensor<fp16, [1, 12, 1, 64]> var_2213_cast_fp16 = matmul(transpose_x = var_2213_transpose_x_0, transpose_y = var_2213_transpose_y_0, x = probs_13_cast_fp16, y = v_t_13_cast_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
            tensor<int32, [4]> var_2218 = const()[name = tensor<string, []>("op_2218"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2223 = const()[name = tensor<string, []>("op_2223"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2219_cast_fp16 = transpose(perm = var_2218, x = var_2213_cast_fp16)[name = tensor<string, []>("transpose_200")];
            tensor<fp16, [1, 1, 768]> input_91_cast_fp16 = reshape(shape = var_2223, x = var_2219_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47143552))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47733440))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_93_cast_fp16 = add(x = input_87_cast_fp16, y = linear_25_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
            tensor<int32, [1]> input_95_axes_0 = const()[name = tensor<string, []>("input_95_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47735040)))];
            tensor<fp16, []> var_2231_to_fp16 = const()[name = tensor<string, []>("op_2231_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, epsilon = var_2231_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47736640))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47835008))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_26_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
            tensor<int32, [4]> var_2244 = const()[name = tensor<string, []>("op_2244"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_13_cast_fp16 = reshape(shape = var_2244, x = linear_26_cast_fp16)[name = tensor<string, []>("xq_proj_13_cast_fp16")];
            tensor<int32, [4]> var_2262 = const()[name = tensor<string, []>("op_2262"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2264_transpose_x_0 = const()[name = tensor<string, []>("op_2264_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2264_transpose_y_0 = const()[name = tensor<string, []>("op_2264_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_122_perm_0 = const()[name = tensor<string, []>("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_123_perm_0 = const()[name = tensor<string, []>("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_6_to_fp16 = cast(dtype = xa_k_6_to_fp16_dtype_0, x = xa_k_6)[name = tensor<string, []>("cast_48")];
            tensor<fp16, [1, 1, 128, 256]> transpose_123 = transpose(perm = transpose_123_perm_0, x = xa_k_6_to_fp16)[name = tensor<string, []>("transpose_198")];
            tensor<fp16, [1, 1, 1, 128]> transpose_122 = transpose(perm = transpose_122_perm_0, x = xq_proj_13_cast_fp16)[name = tensor<string, []>("transpose_199")];
            tensor<fp16, [1, 1, 1, 256]> var_2264_cast_fp16 = matmul(transpose_x = var_2264_transpose_x_0, transpose_y = var_2264_transpose_y_0, x = transpose_122, y = transpose_123)[name = tensor<string, []>("op_2264_cast_fp16")];
            tensor<fp16, []> var_2265_to_fp16 = const()[name = tensor<string, []>("op_2265_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_25_cast_fp16 = mul(x = var_2264_cast_fp16, y = var_2265_to_fp16)[name = tensor<string, []>("xscores_25_cast_fp16")];
            tensor<fp16, []> var_2283_to_fp16 = const()[name = tensor<string, []>("op_2283_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_27_cast_fp16 = select(a = var_2283_to_fp16, b = xscores_25_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_27_cast_fp16")];
            tensor<int32, []> var_2285 = const()[name = tensor<string, []>("op_2285"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_13_cast_fp16 = softmax(axis = var_2285, x = xscores_27_cast_fp16)[name = tensor<string, []>("xprobs_13_cast_fp16")];
            tensor<bool, []> var_2288_transpose_x_0 = const()[name = tensor<string, []>("op_2288_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2288_transpose_y_0 = const()[name = tensor<string, []>("op_2288_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_6_to_fp16 = cast(dtype = xa_v_6_to_fp16_dtype_0, x = xa_v_6)[name = tensor<string, []>("cast_47")];
            tensor<fp16, [1, 1, 256, 128]> xvT_13_cast_fp16 = transpose(perm = var_2262, x = xa_v_6_to_fp16)[name = tensor<string, []>("transpose_197")];
            tensor<fp16, [1, 1, 1, 128]> var_2288_cast_fp16 = matmul(transpose_x = var_2288_transpose_x_0, transpose_y = var_2288_transpose_y_0, x = xprobs_13_cast_fp16, y = xvT_13_cast_fp16)[name = tensor<string, []>("op_2288_cast_fp16")];
            tensor<int32, [4]> var_2293 = const()[name = tensor<string, []>("op_2293"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2298 = const()[name = tensor<string, []>("op_2298"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2294_cast_fp16 = transpose(perm = var_2293, x = var_2288_cast_fp16)[name = tensor<string, []>("transpose_196")];
            tensor<fp16, [1, 1, 128]> input_97_cast_fp16 = reshape(shape = var_2298, x = var_2294_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47835328))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47933696))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_99_cast_fp16 = add(x = input_93_cast_fp16, y = linear_27_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
            tensor<int32, [1]> x_49_axes_0 = const()[name = tensor<string, []>("x_49_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47935296)))];
            tensor<fp16, []> var_2306_to_fp16 = const()[name = tensor<string, []>("op_2306_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_49_cast_fp16 = layer_norm(axes = x_49_axes_0, epsilon = var_2306_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("x_49_cast_fp16")];
            tensor<int32, [3]> var_2322 = const()[name = tensor<string, []>("op_2322"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_25_pad_type_0 = const()[name = tensor<string, []>("y_25_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_25_strides_0 = const()[name = tensor<string, []>("y_25_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_25_pad_0 = const()[name = tensor<string, []>("y_25_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_25_dilations_0 = const()[name = tensor<string, []>("y_25_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_25_groups_0 = const()[name = tensor<string, []>("y_25_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47936896))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50296256))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_51_cast_fp16 = transpose(perm = var_2322, x = x_49_cast_fp16)[name = tensor<string, []>("transpose_195")];
            tensor<fp16, [1, 3072, 1]> y_25_cast_fp16 = conv(dilations = y_25_dilations_0, groups = y_25_groups_0, pad = y_25_pad_0, pad_type = y_25_pad_type_0, strides = y_25_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_51_cast_fp16)[name = tensor<string, []>("y_25_cast_fp16")];
            tensor<string, []> x_53_mode_0 = const()[name = tensor<string, []>("x_53_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = y_25_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")];
            tensor<string, []> y_27_pad_type_0 = const()[name = tensor<string, []>("y_27_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_27_strides_0 = const()[name = tensor<string, []>("y_27_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_27_pad_0 = const()[name = tensor<string, []>("y_27_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_27_dilations_0 = const()[name = tensor<string, []>("y_27_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_27_groups_0 = const()[name = tensor<string, []>("y_27_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50302464))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52661824))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor<string, []>("y_27_cast_fp16")];
            tensor<int32, [3]> var_2340 = const()[name = tensor<string, []>("op_2340"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2341_cast_fp16 = transpose(perm = var_2340, x = y_27_cast_fp16)[name = tensor<string, []>("transpose_194")];
            tensor<fp16, [1, 1, 768]> input_101_cast_fp16 = add(x = input_99_cast_fp16, y = var_2341_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52663424)))];
            tensor<fp16, []> var_2345_to_fp16 = const()[name = tensor<string, []>("op_2345_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, epsilon = var_2345_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52665024))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54434560))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
            tensor<int32, [5]> var_2359 = const()[name = tensor<string, []>("op_2359"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_31_cast_fp16 = reshape(shape = var_2359, x = linear_28_cast_fp16)[name = tensor<string, []>("qkv_31_cast_fp16")];
            tensor<int32, [5]> q_15_begin_0 = const()[name = tensor<string, []>("q_15_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_15_end_0 = const()[name = tensor<string, []>("q_15_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_15_end_mask_0 = const()[name = tensor<string, []>("q_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_15_squeeze_mask_0 = const()[name = tensor<string, []>("q_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_15_cast_fp16 = slice_by_index(begin = q_15_begin_0, end = q_15_end_0, end_mask = q_15_end_mask_0, squeeze_mask = q_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("q_15_cast_fp16")];
            tensor<int32, [5]> new_k_15_begin_0 = const()[name = tensor<string, []>("new_k_15_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_15_end_0 = const()[name = tensor<string, []>("new_k_15_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_15_end_mask_0 = const()[name = tensor<string, []>("new_k_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_k_15_cast_fp16")];
            tensor<int32, [5]> new_v_15_begin_0 = const()[name = tensor<string, []>("new_v_15_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_15_end_0 = const()[name = tensor<string, []>("new_v_15_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_15_end_mask_0 = const()[name = tensor<string, []>("new_v_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_v_15_cast_fp16")];
            tensor<string, []> sa_k_in_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_7_to_fp16 = cast(dtype = sa_k_in_7_to_fp16_dtype_0, x = sa_k_in_7)[name = tensor<string, []>("cast_46")];
            tensor<fp16, [1, 600, 12, 64]> var_2420_cast_fp16 = mul(x = sa_k_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2420_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2421_cast_fp16 = mul(x = new_k_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2421_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_15_cast_fp16 = add(x = var_2420_cast_fp16, y = var_2421_cast_fp16)[name = tensor<string, []>("sa_k_out_15_cast_fp16")];
            tensor<string, []> sa_k_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_7_to_fp16 = cast(dtype = sa_v_in_7_to_fp16_dtype_0, x = sa_v_in_7)[name = tensor<string, []>("cast_45")];
            tensor<fp16, [1, 600, 12, 64]> var_2427_cast_fp16 = mul(x = sa_v_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2427_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2428_cast_fp16 = mul(x = new_v_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2428_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_15_cast_fp16 = add(x = var_2427_cast_fp16, y = var_2428_cast_fp16)[name = tensor<string, []>("sa_v_out_15_cast_fp16")];
            tensor<string, []> sa_v_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2447 = const()[name = tensor<string, []>("op_2447"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2449_transpose_x_0 = const()[name = tensor<string, []>("op_2449_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2449_transpose_y_0 = const()[name = tensor<string, []>("op_2449_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_124_perm_0 = const()[name = tensor<string, []>("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_125_perm_0 = const()[name = tensor<string, []>("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_125 = transpose(perm = transpose_125_perm_0, x = sa_k_out_15_cast_fp16)[name = tensor<string, []>("transpose_192")];
            tensor<fp16, [1, 12, 1, 64]> transpose_124 = transpose(perm = transpose_124_perm_0, x = q_15_cast_fp16)[name = tensor<string, []>("transpose_193")];
            tensor<fp16, [1, 12, 1, 600]> var_2449_cast_fp16 = matmul(transpose_x = var_2449_transpose_x_0, transpose_y = var_2449_transpose_y_0, x = transpose_124, y = transpose_125)[name = tensor<string, []>("op_2449_cast_fp16")];
            tensor<fp16, []> var_2450_to_fp16 = const()[name = tensor<string, []>("op_2450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_29_cast_fp16 = mul(x = var_2449_cast_fp16, y = var_2450_to_fp16)[name = tensor<string, []>("scores_29_cast_fp16")];
            tensor<fp16, []> var_2468_to_fp16 = const()[name = tensor<string, []>("op_2468_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_31_cast_fp16 = select(a = var_2468_to_fp16, b = scores_29_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_31_cast_fp16")];
            tensor<int32, []> var_2470 = const()[name = tensor<string, []>("op_2470"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_15_cast_fp16 = softmax(axis = var_2470, x = scores_31_cast_fp16)[name = tensor<string, []>("probs_15_cast_fp16")];
            tensor<bool, []> var_2473_transpose_x_0 = const()[name = tensor<string, []>("op_2473_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2473_transpose_y_0 = const()[name = tensor<string, []>("op_2473_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_15_cast_fp16 = transpose(perm = var_2447, x = sa_v_out_15_cast_fp16)[name = tensor<string, []>("transpose_191")];
            tensor<fp16, [1, 12, 1, 64]> var_2473_cast_fp16 = matmul(transpose_x = var_2473_transpose_x_0, transpose_y = var_2473_transpose_y_0, x = probs_15_cast_fp16, y = v_t_15_cast_fp16)[name = tensor<string, []>("op_2473_cast_fp16")];
            tensor<int32, [4]> var_2478 = const()[name = tensor<string, []>("op_2478"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2483 = const()[name = tensor<string, []>("op_2483"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2479_cast_fp16 = transpose(perm = var_2478, x = var_2473_cast_fp16)[name = tensor<string, []>("transpose_190")];
            tensor<fp16, [1, 1, 768]> input_105_cast_fp16 = reshape(shape = var_2483, x = var_2479_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54439232))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55029120))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_107_cast_fp16 = add(x = input_101_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
            tensor<int32, [1]> input_109_axes_0 = const()[name = tensor<string, []>("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55030720)))];
            tensor<fp16, []> var_2491_to_fp16 = const()[name = tensor<string, []>("op_2491_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, epsilon = var_2491_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55032320))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55130688))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
            tensor<int32, [4]> var_2504 = const()[name = tensor<string, []>("op_2504"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_15_cast_fp16 = reshape(shape = var_2504, x = linear_30_cast_fp16)[name = tensor<string, []>("xq_proj_15_cast_fp16")];
            tensor<int32, [4]> var_2522 = const()[name = tensor<string, []>("op_2522"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2524_transpose_x_0 = const()[name = tensor<string, []>("op_2524_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2524_transpose_y_0 = const()[name = tensor<string, []>("op_2524_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_126_perm_0 = const()[name = tensor<string, []>("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_127_perm_0 = const()[name = tensor<string, []>("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_7_to_fp16 = cast(dtype = xa_k_7_to_fp16_dtype_0, x = xa_k_7)[name = tensor<string, []>("cast_44")];
            tensor<fp16, [1, 1, 128, 256]> transpose_127 = transpose(perm = transpose_127_perm_0, x = xa_k_7_to_fp16)[name = tensor<string, []>("transpose_188")];
            tensor<fp16, [1, 1, 1, 128]> transpose_126 = transpose(perm = transpose_126_perm_0, x = xq_proj_15_cast_fp16)[name = tensor<string, []>("transpose_189")];
            tensor<fp16, [1, 1, 1, 256]> var_2524_cast_fp16 = matmul(transpose_x = var_2524_transpose_x_0, transpose_y = var_2524_transpose_y_0, x = transpose_126, y = transpose_127)[name = tensor<string, []>("op_2524_cast_fp16")];
            tensor<fp16, []> var_2525_to_fp16 = const()[name = tensor<string, []>("op_2525_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_29_cast_fp16 = mul(x = var_2524_cast_fp16, y = var_2525_to_fp16)[name = tensor<string, []>("xscores_29_cast_fp16")];
            tensor<fp16, []> var_2543_to_fp16 = const()[name = tensor<string, []>("op_2543_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_31_cast_fp16 = select(a = var_2543_to_fp16, b = xscores_29_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_31_cast_fp16")];
            tensor<int32, []> var_2545 = const()[name = tensor<string, []>("op_2545"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_15_cast_fp16 = softmax(axis = var_2545, x = xscores_31_cast_fp16)[name = tensor<string, []>("xprobs_15_cast_fp16")];
            tensor<bool, []> var_2548_transpose_x_0 = const()[name = tensor<string, []>("op_2548_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2548_transpose_y_0 = const()[name = tensor<string, []>("op_2548_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_7_to_fp16 = cast(dtype = xa_v_7_to_fp16_dtype_0, x = xa_v_7)[name = tensor<string, []>("cast_43")];
            tensor<fp16, [1, 1, 256, 128]> xvT_15_cast_fp16 = transpose(perm = var_2522, x = xa_v_7_to_fp16)[name = tensor<string, []>("transpose_187")];
            tensor<fp16, [1, 1, 1, 128]> var_2548_cast_fp16 = matmul(transpose_x = var_2548_transpose_x_0, transpose_y = var_2548_transpose_y_0, x = xprobs_15_cast_fp16, y = xvT_15_cast_fp16)[name = tensor<string, []>("op_2548_cast_fp16")];
            tensor<int32, [4]> var_2553 = const()[name = tensor<string, []>("op_2553"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2558 = const()[name = tensor<string, []>("op_2558"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2554_cast_fp16 = transpose(perm = var_2553, x = var_2548_cast_fp16)[name = tensor<string, []>("transpose_186")];
            tensor<fp16, [1, 1, 128]> input_111_cast_fp16 = reshape(shape = var_2558, x = var_2554_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55131008))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55229376))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_113_cast_fp16 = add(x = input_107_cast_fp16, y = linear_31_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
            tensor<int32, [1]> x_57_axes_0 = const()[name = tensor<string, []>("x_57_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55230976)))];
            tensor<fp16, []> var_2566_to_fp16 = const()[name = tensor<string, []>("op_2566_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_57_cast_fp16 = layer_norm(axes = x_57_axes_0, epsilon = var_2566_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("x_57_cast_fp16")];
            tensor<int32, [3]> var_2582 = const()[name = tensor<string, []>("op_2582"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_29_pad_type_0 = const()[name = tensor<string, []>("y_29_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_29_strides_0 = const()[name = tensor<string, []>("y_29_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_29_pad_0 = const()[name = tensor<string, []>("y_29_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_29_dilations_0 = const()[name = tensor<string, []>("y_29_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_29_groups_0 = const()[name = tensor<string, []>("y_29_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55232576))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57591936))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_59_cast_fp16 = transpose(perm = var_2582, x = x_57_cast_fp16)[name = tensor<string, []>("transpose_185")];
            tensor<fp16, [1, 3072, 1]> y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = tensor<string, []>("y_29_cast_fp16")];
            tensor<string, []> x_61_mode_0 = const()[name = tensor<string, []>("x_61_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_29_cast_fp16)[name = tensor<string, []>("x_61_cast_fp16")];
            tensor<string, []> y_31_pad_type_0 = const()[name = tensor<string, []>("y_31_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_31_strides_0 = const()[name = tensor<string, []>("y_31_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_31_pad_0 = const()[name = tensor<string, []>("y_31_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_31_dilations_0 = const()[name = tensor<string, []>("y_31_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_31_groups_0 = const()[name = tensor<string, []>("y_31_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57598144))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59957504))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_31_cast_fp16 = conv(dilations = y_31_dilations_0, groups = y_31_groups_0, pad = y_31_pad_0, pad_type = y_31_pad_type_0, strides = y_31_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = tensor<string, []>("y_31_cast_fp16")];
            tensor<int32, [3]> var_2600 = const()[name = tensor<string, []>("op_2600"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2601_cast_fp16 = transpose(perm = var_2600, x = y_31_cast_fp16)[name = tensor<string, []>("transpose_184")];
            tensor<fp16, [1, 1, 768]> input_115_cast_fp16 = add(x = input_113_cast_fp16, y = var_2601_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59959104)))];
            tensor<fp16, []> var_2605_to_fp16 = const()[name = tensor<string, []>("op_2605_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_2605_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59960704))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61730240))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
            tensor<int32, [5]> var_2619 = const()[name = tensor<string, []>("op_2619"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_35_cast_fp16 = reshape(shape = var_2619, x = linear_32_cast_fp16)[name = tensor<string, []>("qkv_35_cast_fp16")];
            tensor<int32, [5]> q_17_begin_0 = const()[name = tensor<string, []>("q_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_17_end_0 = const()[name = tensor<string, []>("q_17_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_17_end_mask_0 = const()[name = tensor<string, []>("q_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_17_squeeze_mask_0 = const()[name = tensor<string, []>("q_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("q_17_cast_fp16")];
            tensor<int32, [5]> new_k_17_begin_0 = const()[name = tensor<string, []>("new_k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_17_end_0 = const()[name = tensor<string, []>("new_k_17_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_17_end_mask_0 = const()[name = tensor<string, []>("new_k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_k_17_cast_fp16")];
            tensor<int32, [5]> new_v_17_begin_0 = const()[name = tensor<string, []>("new_v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_17_end_0 = const()[name = tensor<string, []>("new_v_17_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_17_end_mask_0 = const()[name = tensor<string, []>("new_v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_v_17_cast_fp16")];
            tensor<string, []> sa_k_in_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_8_to_fp16 = cast(dtype = sa_k_in_8_to_fp16_dtype_0, x = sa_k_in_8)[name = tensor<string, []>("cast_42")];
            tensor<fp16, [1, 600, 12, 64]> var_2680_cast_fp16 = mul(x = sa_k_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2680_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2681_cast_fp16 = mul(x = new_k_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2681_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_17_cast_fp16 = add(x = var_2680_cast_fp16, y = var_2681_cast_fp16)[name = tensor<string, []>("sa_k_out_17_cast_fp16")];
            tensor<string, []> sa_k_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_8_to_fp16 = cast(dtype = sa_v_in_8_to_fp16_dtype_0, x = sa_v_in_8)[name = tensor<string, []>("cast_41")];
            tensor<fp16, [1, 600, 12, 64]> var_2687_cast_fp16 = mul(x = sa_v_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2687_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2688_cast_fp16 = mul(x = new_v_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_17_cast_fp16 = add(x = var_2687_cast_fp16, y = var_2688_cast_fp16)[name = tensor<string, []>("sa_v_out_17_cast_fp16")];
            tensor<string, []> sa_v_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2707 = const()[name = tensor<string, []>("op_2707"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2709_transpose_x_0 = const()[name = tensor<string, []>("op_2709_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2709_transpose_y_0 = const()[name = tensor<string, []>("op_2709_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_128_perm_0 = const()[name = tensor<string, []>("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_129_perm_0 = const()[name = tensor<string, []>("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_129 = transpose(perm = transpose_129_perm_0, x = sa_k_out_17_cast_fp16)[name = tensor<string, []>("transpose_182")];
            tensor<fp16, [1, 12, 1, 64]> transpose_128 = transpose(perm = transpose_128_perm_0, x = q_17_cast_fp16)[name = tensor<string, []>("transpose_183")];
            tensor<fp16, [1, 12, 1, 600]> var_2709_cast_fp16 = matmul(transpose_x = var_2709_transpose_x_0, transpose_y = var_2709_transpose_y_0, x = transpose_128, y = transpose_129)[name = tensor<string, []>("op_2709_cast_fp16")];
            tensor<fp16, []> var_2710_to_fp16 = const()[name = tensor<string, []>("op_2710_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_33_cast_fp16 = mul(x = var_2709_cast_fp16, y = var_2710_to_fp16)[name = tensor<string, []>("scores_33_cast_fp16")];
            tensor<fp16, []> var_2728_to_fp16 = const()[name = tensor<string, []>("op_2728_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_35_cast_fp16 = select(a = var_2728_to_fp16, b = scores_33_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_35_cast_fp16")];
            tensor<int32, []> var_2730 = const()[name = tensor<string, []>("op_2730"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_17_cast_fp16 = softmax(axis = var_2730, x = scores_35_cast_fp16)[name = tensor<string, []>("probs_17_cast_fp16")];
            tensor<bool, []> var_2733_transpose_x_0 = const()[name = tensor<string, []>("op_2733_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2733_transpose_y_0 = const()[name = tensor<string, []>("op_2733_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_17_cast_fp16 = transpose(perm = var_2707, x = sa_v_out_17_cast_fp16)[name = tensor<string, []>("transpose_181")];
            tensor<fp16, [1, 12, 1, 64]> var_2733_cast_fp16 = matmul(transpose_x = var_2733_transpose_x_0, transpose_y = var_2733_transpose_y_0, x = probs_17_cast_fp16, y = v_t_17_cast_fp16)[name = tensor<string, []>("op_2733_cast_fp16")];
            tensor<int32, [4]> var_2738 = const()[name = tensor<string, []>("op_2738"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2743 = const()[name = tensor<string, []>("op_2743"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2739_cast_fp16 = transpose(perm = var_2738, x = var_2733_cast_fp16)[name = tensor<string, []>("transpose_180")];
            tensor<fp16, [1, 1, 768]> input_119_cast_fp16 = reshape(shape = var_2743, x = var_2739_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61734912))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62324800))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_121_cast_fp16 = add(x = input_115_cast_fp16, y = linear_33_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62326400)))];
            tensor<fp16, []> var_2751_to_fp16 = const()[name = tensor<string, []>("op_2751_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, epsilon = var_2751_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62328000))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62426368))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
            tensor<int32, [4]> var_2764 = const()[name = tensor<string, []>("op_2764"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_17_cast_fp16 = reshape(shape = var_2764, x = linear_34_cast_fp16)[name = tensor<string, []>("xq_proj_17_cast_fp16")];
            tensor<int32, [4]> var_2782 = const()[name = tensor<string, []>("op_2782"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2784_transpose_x_0 = const()[name = tensor<string, []>("op_2784_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2784_transpose_y_0 = const()[name = tensor<string, []>("op_2784_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_130_perm_0 = const()[name = tensor<string, []>("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_131_perm_0 = const()[name = tensor<string, []>("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_8_to_fp16 = cast(dtype = xa_k_8_to_fp16_dtype_0, x = xa_k_8)[name = tensor<string, []>("cast_40")];
            tensor<fp16, [1, 1, 128, 256]> transpose_131 = transpose(perm = transpose_131_perm_0, x = xa_k_8_to_fp16)[name = tensor<string, []>("transpose_178")];
            tensor<fp16, [1, 1, 1, 128]> transpose_130 = transpose(perm = transpose_130_perm_0, x = xq_proj_17_cast_fp16)[name = tensor<string, []>("transpose_179")];
            tensor<fp16, [1, 1, 1, 256]> var_2784_cast_fp16 = matmul(transpose_x = var_2784_transpose_x_0, transpose_y = var_2784_transpose_y_0, x = transpose_130, y = transpose_131)[name = tensor<string, []>("op_2784_cast_fp16")];
            tensor<fp16, []> var_2785_to_fp16 = const()[name = tensor<string, []>("op_2785_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_33_cast_fp16 = mul(x = var_2784_cast_fp16, y = var_2785_to_fp16)[name = tensor<string, []>("xscores_33_cast_fp16")];
            tensor<fp16, []> var_2803_to_fp16 = const()[name = tensor<string, []>("op_2803_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_35_cast_fp16 = select(a = var_2803_to_fp16, b = xscores_33_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_35_cast_fp16")];
            tensor<int32, []> var_2805 = const()[name = tensor<string, []>("op_2805"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_17_cast_fp16 = softmax(axis = var_2805, x = xscores_35_cast_fp16)[name = tensor<string, []>("xprobs_17_cast_fp16")];
            tensor<bool, []> var_2808_transpose_x_0 = const()[name = tensor<string, []>("op_2808_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2808_transpose_y_0 = const()[name = tensor<string, []>("op_2808_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_8_to_fp16 = cast(dtype = xa_v_8_to_fp16_dtype_0, x = xa_v_8)[name = tensor<string, []>("cast_39")];
            tensor<fp16, [1, 1, 256, 128]> xvT_17_cast_fp16 = transpose(perm = var_2782, x = xa_v_8_to_fp16)[name = tensor<string, []>("transpose_177")];
            tensor<fp16, [1, 1, 1, 128]> var_2808_cast_fp16 = matmul(transpose_x = var_2808_transpose_x_0, transpose_y = var_2808_transpose_y_0, x = xprobs_17_cast_fp16, y = xvT_17_cast_fp16)[name = tensor<string, []>("op_2808_cast_fp16")];
            tensor<int32, [4]> var_2813 = const()[name = tensor<string, []>("op_2813"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2818 = const()[name = tensor<string, []>("op_2818"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2814_cast_fp16 = transpose(perm = var_2813, x = var_2808_cast_fp16)[name = tensor<string, []>("transpose_176")];
            tensor<fp16, [1, 1, 128]> input_125_cast_fp16 = reshape(shape = var_2818, x = var_2814_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62426688))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62525056))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_127_cast_fp16 = add(x = input_121_cast_fp16, y = linear_35_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
            tensor<int32, [1]> x_65_axes_0 = const()[name = tensor<string, []>("x_65_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62526656)))];
            tensor<fp16, []> var_2826_to_fp16 = const()[name = tensor<string, []>("op_2826_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_2826_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("x_65_cast_fp16")];
            tensor<int32, [3]> var_2842 = const()[name = tensor<string, []>("op_2842"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_33_pad_type_0 = const()[name = tensor<string, []>("y_33_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_33_strides_0 = const()[name = tensor<string, []>("y_33_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_33_pad_0 = const()[name = tensor<string, []>("y_33_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_33_dilations_0 = const()[name = tensor<string, []>("y_33_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_33_groups_0 = const()[name = tensor<string, []>("y_33_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62528256))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64887616))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_67_cast_fp16 = transpose(perm = var_2842, x = x_65_cast_fp16)[name = tensor<string, []>("transpose_175")];
            tensor<fp16, [1, 3072, 1]> y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = tensor<string, []>("y_33_cast_fp16")];
            tensor<string, []> x_69_mode_0 = const()[name = tensor<string, []>("x_69_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_69_cast_fp16 = gelu(mode = x_69_mode_0, x = y_33_cast_fp16)[name = tensor<string, []>("x_69_cast_fp16")];
            tensor<string, []> y_35_pad_type_0 = const()[name = tensor<string, []>("y_35_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_35_strides_0 = const()[name = tensor<string, []>("y_35_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_35_pad_0 = const()[name = tensor<string, []>("y_35_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_35_dilations_0 = const()[name = tensor<string, []>("y_35_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_35_groups_0 = const()[name = tensor<string, []>("y_35_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64893824))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67253184))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = tensor<string, []>("y_35_cast_fp16")];
            tensor<int32, [3]> var_2860 = const()[name = tensor<string, []>("op_2860"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2861_cast_fp16 = transpose(perm = var_2860, x = y_35_cast_fp16)[name = tensor<string, []>("transpose_174")];
            tensor<fp16, [1, 1, 768]> input_129_cast_fp16 = add(x = input_127_cast_fp16, y = var_2861_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
            tensor<int32, [1]> input_131_axes_0 = const()[name = tensor<string, []>("input_131_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67254784)))];
            tensor<fp16, []> var_2865_to_fp16 = const()[name = tensor<string, []>("op_2865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, epsilon = var_2865_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67256384))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69025920))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
            tensor<int32, [5]> var_2879 = const()[name = tensor<string, []>("op_2879"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_39_cast_fp16 = reshape(shape = var_2879, x = linear_36_cast_fp16)[name = tensor<string, []>("qkv_39_cast_fp16")];
            tensor<int32, [5]> q_19_begin_0 = const()[name = tensor<string, []>("q_19_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_19_end_0 = const()[name = tensor<string, []>("q_19_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_19_end_mask_0 = const()[name = tensor<string, []>("q_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_19_squeeze_mask_0 = const()[name = tensor<string, []>("q_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_19_cast_fp16 = slice_by_index(begin = q_19_begin_0, end = q_19_end_0, end_mask = q_19_end_mask_0, squeeze_mask = q_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("q_19_cast_fp16")];
            tensor<int32, [5]> new_k_19_begin_0 = const()[name = tensor<string, []>("new_k_19_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_19_end_0 = const()[name = tensor<string, []>("new_k_19_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_19_end_mask_0 = const()[name = tensor<string, []>("new_k_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_k_19_cast_fp16")];
            tensor<int32, [5]> new_v_19_begin_0 = const()[name = tensor<string, []>("new_v_19_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_19_end_0 = const()[name = tensor<string, []>("new_v_19_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_19_end_mask_0 = const()[name = tensor<string, []>("new_v_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_v_19_cast_fp16")];
            tensor<string, []> sa_k_in_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_9_to_fp16 = cast(dtype = sa_k_in_9_to_fp16_dtype_0, x = sa_k_in_9)[name = tensor<string, []>("cast_38")];
            tensor<fp16, [1, 600, 12, 64]> var_2940_cast_fp16 = mul(x = sa_k_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2940_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2941_cast_fp16 = mul(x = new_k_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2941_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_19_cast_fp16 = add(x = var_2940_cast_fp16, y = var_2941_cast_fp16)[name = tensor<string, []>("sa_k_out_19_cast_fp16")];
            tensor<string, []> sa_k_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_9_to_fp16 = cast(dtype = sa_v_in_9_to_fp16_dtype_0, x = sa_v_in_9)[name = tensor<string, []>("cast_37")];
            tensor<fp16, [1, 600, 12, 64]> var_2947_cast_fp16 = mul(x = sa_v_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2947_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2948_cast_fp16 = mul(x = new_v_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2948_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_19_cast_fp16 = add(x = var_2947_cast_fp16, y = var_2948_cast_fp16)[name = tensor<string, []>("sa_v_out_19_cast_fp16")];
            tensor<string, []> sa_v_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2967 = const()[name = tensor<string, []>("op_2967"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2969_transpose_x_0 = const()[name = tensor<string, []>("op_2969_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2969_transpose_y_0 = const()[name = tensor<string, []>("op_2969_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_132_perm_0 = const()[name = tensor<string, []>("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_133_perm_0 = const()[name = tensor<string, []>("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_133 = transpose(perm = transpose_133_perm_0, x = sa_k_out_19_cast_fp16)[name = tensor<string, []>("transpose_172")];
            tensor<fp16, [1, 12, 1, 64]> transpose_132 = transpose(perm = transpose_132_perm_0, x = q_19_cast_fp16)[name = tensor<string, []>("transpose_173")];
            tensor<fp16, [1, 12, 1, 600]> var_2969_cast_fp16 = matmul(transpose_x = var_2969_transpose_x_0, transpose_y = var_2969_transpose_y_0, x = transpose_132, y = transpose_133)[name = tensor<string, []>("op_2969_cast_fp16")];
            tensor<fp16, []> var_2970_to_fp16 = const()[name = tensor<string, []>("op_2970_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_37_cast_fp16 = mul(x = var_2969_cast_fp16, y = var_2970_to_fp16)[name = tensor<string, []>("scores_37_cast_fp16")];
            tensor<fp16, []> var_2988_to_fp16 = const()[name = tensor<string, []>("op_2988_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_39_cast_fp16 = select(a = var_2988_to_fp16, b = scores_37_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_39_cast_fp16")];
            tensor<int32, []> var_2990 = const()[name = tensor<string, []>("op_2990"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_19_cast_fp16 = softmax(axis = var_2990, x = scores_39_cast_fp16)[name = tensor<string, []>("probs_19_cast_fp16")];
            tensor<bool, []> var_2993_transpose_x_0 = const()[name = tensor<string, []>("op_2993_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2993_transpose_y_0 = const()[name = tensor<string, []>("op_2993_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_19_cast_fp16 = transpose(perm = var_2967, x = sa_v_out_19_cast_fp16)[name = tensor<string, []>("transpose_171")];
            tensor<fp16, [1, 12, 1, 64]> var_2993_cast_fp16 = matmul(transpose_x = var_2993_transpose_x_0, transpose_y = var_2993_transpose_y_0, x = probs_19_cast_fp16, y = v_t_19_cast_fp16)[name = tensor<string, []>("op_2993_cast_fp16")];
            tensor<int32, [4]> var_2998 = const()[name = tensor<string, []>("op_2998"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3003 = const()[name = tensor<string, []>("op_3003"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2999_cast_fp16 = transpose(perm = var_2998, x = var_2993_cast_fp16)[name = tensor<string, []>("transpose_170")];
            tensor<fp16, [1, 1, 768]> input_133_cast_fp16 = reshape(shape = var_3003, x = var_2999_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69030592))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69620480))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_37_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69622080)))];
            tensor<fp16, []> var_3011_to_fp16 = const()[name = tensor<string, []>("op_3011_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, epsilon = var_3011_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69623680))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69722048))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
            tensor<int32, [4]> var_3024 = const()[name = tensor<string, []>("op_3024"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_19_cast_fp16 = reshape(shape = var_3024, x = linear_38_cast_fp16)[name = tensor<string, []>("xq_proj_19_cast_fp16")];
            tensor<int32, [4]> var_3042 = const()[name = tensor<string, []>("op_3042"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3044_transpose_x_0 = const()[name = tensor<string, []>("op_3044_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3044_transpose_y_0 = const()[name = tensor<string, []>("op_3044_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_134_perm_0 = const()[name = tensor<string, []>("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_135_perm_0 = const()[name = tensor<string, []>("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_9_to_fp16 = cast(dtype = xa_k_9_to_fp16_dtype_0, x = xa_k_9)[name = tensor<string, []>("cast_36")];
            tensor<fp16, [1, 1, 128, 256]> transpose_135 = transpose(perm = transpose_135_perm_0, x = xa_k_9_to_fp16)[name = tensor<string, []>("transpose_168")];
            tensor<fp16, [1, 1, 1, 128]> transpose_134 = transpose(perm = transpose_134_perm_0, x = xq_proj_19_cast_fp16)[name = tensor<string, []>("transpose_169")];
            tensor<fp16, [1, 1, 1, 256]> var_3044_cast_fp16 = matmul(transpose_x = var_3044_transpose_x_0, transpose_y = var_3044_transpose_y_0, x = transpose_134, y = transpose_135)[name = tensor<string, []>("op_3044_cast_fp16")];
            tensor<fp16, []> var_3045_to_fp16 = const()[name = tensor<string, []>("op_3045_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_37_cast_fp16 = mul(x = var_3044_cast_fp16, y = var_3045_to_fp16)[name = tensor<string, []>("xscores_37_cast_fp16")];
            tensor<fp16, []> var_3063_to_fp16 = const()[name = tensor<string, []>("op_3063_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_39_cast_fp16 = select(a = var_3063_to_fp16, b = xscores_37_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_39_cast_fp16")];
            tensor<int32, []> var_3065 = const()[name = tensor<string, []>("op_3065"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_19_cast_fp16 = softmax(axis = var_3065, x = xscores_39_cast_fp16)[name = tensor<string, []>("xprobs_19_cast_fp16")];
            tensor<bool, []> var_3068_transpose_x_0 = const()[name = tensor<string, []>("op_3068_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3068_transpose_y_0 = const()[name = tensor<string, []>("op_3068_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_9_to_fp16 = cast(dtype = xa_v_9_to_fp16_dtype_0, x = xa_v_9)[name = tensor<string, []>("cast_35")];
            tensor<fp16, [1, 1, 256, 128]> xvT_19_cast_fp16 = transpose(perm = var_3042, x = xa_v_9_to_fp16)[name = tensor<string, []>("transpose_167")];
            tensor<fp16, [1, 1, 1, 128]> var_3068_cast_fp16 = matmul(transpose_x = var_3068_transpose_x_0, transpose_y = var_3068_transpose_y_0, x = xprobs_19_cast_fp16, y = xvT_19_cast_fp16)[name = tensor<string, []>("op_3068_cast_fp16")];
            tensor<int32, [4]> var_3073 = const()[name = tensor<string, []>("op_3073"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3078 = const()[name = tensor<string, []>("op_3078"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3074_cast_fp16 = transpose(perm = var_3073, x = var_3068_cast_fp16)[name = tensor<string, []>("transpose_166")];
            tensor<fp16, [1, 1, 128]> input_139_cast_fp16 = reshape(shape = var_3078, x = var_3074_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69722368))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69820736))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_141_cast_fp16 = add(x = input_135_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = tensor<string, []>("x_73_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69822336)))];
            tensor<fp16, []> var_3086_to_fp16 = const()[name = tensor<string, []>("op_3086_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_3086_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("x_73_cast_fp16")];
            tensor<int32, [3]> var_3102 = const()[name = tensor<string, []>("op_3102"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_37_pad_type_0 = const()[name = tensor<string, []>("y_37_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_37_strides_0 = const()[name = tensor<string, []>("y_37_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_37_pad_0 = const()[name = tensor<string, []>("y_37_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_37_dilations_0 = const()[name = tensor<string, []>("y_37_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_37_groups_0 = const()[name = tensor<string, []>("y_37_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69823936))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72183296))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_75_cast_fp16 = transpose(perm = var_3102, x = x_73_cast_fp16)[name = tensor<string, []>("transpose_165")];
            tensor<fp16, [1, 3072, 1]> y_37_cast_fp16 = conv(dilations = y_37_dilations_0, groups = y_37_groups_0, pad = y_37_pad_0, pad_type = y_37_pad_type_0, strides = y_37_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = tensor<string, []>("y_37_cast_fp16")];
            tensor<string, []> x_77_mode_0 = const()[name = tensor<string, []>("x_77_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_37_cast_fp16)[name = tensor<string, []>("x_77_cast_fp16")];
            tensor<string, []> y_39_pad_type_0 = const()[name = tensor<string, []>("y_39_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_39_strides_0 = const()[name = tensor<string, []>("y_39_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_39_pad_0 = const()[name = tensor<string, []>("y_39_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_39_dilations_0 = const()[name = tensor<string, []>("y_39_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_39_groups_0 = const()[name = tensor<string, []>("y_39_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72189504))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74548864))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = tensor<string, []>("y_39_cast_fp16")];
            tensor<int32, [3]> var_3120 = const()[name = tensor<string, []>("op_3120"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3121_cast_fp16 = transpose(perm = var_3120, x = y_39_cast_fp16)[name = tensor<string, []>("transpose_164")];
            tensor<fp16, [1, 1, 768]> input_143_cast_fp16 = add(x = input_141_cast_fp16, y = var_3121_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
            tensor<int32, [1]> input_145_axes_0 = const()[name = tensor<string, []>("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74550464)))];
            tensor<fp16, []> var_3125_to_fp16 = const()[name = tensor<string, []>("op_3125_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, epsilon = var_3125_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74552064))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76321600))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
            tensor<int32, [5]> var_3139 = const()[name = tensor<string, []>("op_3139"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_43_cast_fp16 = reshape(shape = var_3139, x = linear_40_cast_fp16)[name = tensor<string, []>("qkv_43_cast_fp16")];
            tensor<int32, [5]> q_21_begin_0 = const()[name = tensor<string, []>("q_21_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_21_end_0 = const()[name = tensor<string, []>("q_21_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_21_end_mask_0 = const()[name = tensor<string, []>("q_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_21_squeeze_mask_0 = const()[name = tensor<string, []>("q_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("q_21_cast_fp16")];
            tensor<int32, [5]> new_k_21_begin_0 = const()[name = tensor<string, []>("new_k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_21_end_0 = const()[name = tensor<string, []>("new_k_21_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_21_end_mask_0 = const()[name = tensor<string, []>("new_k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_k_21_cast_fp16")];
            tensor<int32, [5]> new_v_21_begin_0 = const()[name = tensor<string, []>("new_v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_21_end_0 = const()[name = tensor<string, []>("new_v_21_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_21_end_mask_0 = const()[name = tensor<string, []>("new_v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_v_21_cast_fp16")];
            tensor<string, []> sa_k_in_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_10_to_fp16 = cast(dtype = sa_k_in_10_to_fp16_dtype_0, x = sa_k_in_10)[name = tensor<string, []>("cast_34")];
            tensor<fp16, [1, 600, 12, 64]> var_3200_cast_fp16 = mul(x = sa_k_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3200_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3201_cast_fp16 = mul(x = new_k_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3201_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_21_cast_fp16 = add(x = var_3200_cast_fp16, y = var_3201_cast_fp16)[name = tensor<string, []>("sa_k_out_21_cast_fp16")];
            tensor<string, []> sa_k_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_10_to_fp16 = cast(dtype = sa_v_in_10_to_fp16_dtype_0, x = sa_v_in_10)[name = tensor<string, []>("cast_33")];
            tensor<fp16, [1, 600, 12, 64]> var_3207_cast_fp16 = mul(x = sa_v_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3207_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3208_cast_fp16 = mul(x = new_v_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3208_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_21_cast_fp16 = add(x = var_3207_cast_fp16, y = var_3208_cast_fp16)[name = tensor<string, []>("sa_v_out_21_cast_fp16")];
            tensor<string, []> sa_v_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_3227 = const()[name = tensor<string, []>("op_3227"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_3229_transpose_x_0 = const()[name = tensor<string, []>("op_3229_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3229_transpose_y_0 = const()[name = tensor<string, []>("op_3229_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_136_perm_0 = const()[name = tensor<string, []>("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_137_perm_0 = const()[name = tensor<string, []>("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_137 = transpose(perm = transpose_137_perm_0, x = sa_k_out_21_cast_fp16)[name = tensor<string, []>("transpose_162")];
            tensor<fp16, [1, 12, 1, 64]> transpose_136 = transpose(perm = transpose_136_perm_0, x = q_21_cast_fp16)[name = tensor<string, []>("transpose_163")];
            tensor<fp16, [1, 12, 1, 600]> var_3229_cast_fp16 = matmul(transpose_x = var_3229_transpose_x_0, transpose_y = var_3229_transpose_y_0, x = transpose_136, y = transpose_137)[name = tensor<string, []>("op_3229_cast_fp16")];
            tensor<fp16, []> var_3230_to_fp16 = const()[name = tensor<string, []>("op_3230_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_41_cast_fp16 = mul(x = var_3229_cast_fp16, y = var_3230_to_fp16)[name = tensor<string, []>("scores_41_cast_fp16")];
            tensor<fp16, []> var_3248_to_fp16 = const()[name = tensor<string, []>("op_3248_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_43_cast_fp16 = select(a = var_3248_to_fp16, b = scores_41_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_43_cast_fp16")];
            tensor<int32, []> var_3250 = const()[name = tensor<string, []>("op_3250"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_21_cast_fp16 = softmax(axis = var_3250, x = scores_43_cast_fp16)[name = tensor<string, []>("probs_21_cast_fp16")];
            tensor<bool, []> var_3253_transpose_x_0 = const()[name = tensor<string, []>("op_3253_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3253_transpose_y_0 = const()[name = tensor<string, []>("op_3253_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_21_cast_fp16 = transpose(perm = var_3227, x = sa_v_out_21_cast_fp16)[name = tensor<string, []>("transpose_161")];
            tensor<fp16, [1, 12, 1, 64]> var_3253_cast_fp16 = matmul(transpose_x = var_3253_transpose_x_0, transpose_y = var_3253_transpose_y_0, x = probs_21_cast_fp16, y = v_t_21_cast_fp16)[name = tensor<string, []>("op_3253_cast_fp16")];
            tensor<int32, [4]> var_3258 = const()[name = tensor<string, []>("op_3258"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3263 = const()[name = tensor<string, []>("op_3263"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_3259_cast_fp16 = transpose(perm = var_3258, x = var_3253_cast_fp16)[name = tensor<string, []>("transpose_160")];
            tensor<fp16, [1, 1, 768]> input_147_cast_fp16 = reshape(shape = var_3263, x = var_3259_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76326272))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76916160))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_41_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
            tensor<int32, [1]> input_151_axes_0 = const()[name = tensor<string, []>("input_151_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76917760)))];
            tensor<fp16, []> var_3271_to_fp16 = const()[name = tensor<string, []>("op_3271_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_151_cast_fp16 = layer_norm(axes = input_151_axes_0, epsilon = var_3271_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76919360))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77017728))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
            tensor<int32, [4]> var_3284 = const()[name = tensor<string, []>("op_3284"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_21_cast_fp16 = reshape(shape = var_3284, x = linear_42_cast_fp16)[name = tensor<string, []>("xq_proj_21_cast_fp16")];
            tensor<int32, [4]> var_3302 = const()[name = tensor<string, []>("op_3302"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3304_transpose_x_0 = const()[name = tensor<string, []>("op_3304_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3304_transpose_y_0 = const()[name = tensor<string, []>("op_3304_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_138_perm_0 = const()[name = tensor<string, []>("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_139_perm_0 = const()[name = tensor<string, []>("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_10_to_fp16 = cast(dtype = xa_k_10_to_fp16_dtype_0, x = xa_k_10)[name = tensor<string, []>("cast_32")];
            tensor<fp16, [1, 1, 128, 256]> transpose_139 = transpose(perm = transpose_139_perm_0, x = xa_k_10_to_fp16)[name = tensor<string, []>("transpose_158")];
            tensor<fp16, [1, 1, 1, 128]> transpose_138 = transpose(perm = transpose_138_perm_0, x = xq_proj_21_cast_fp16)[name = tensor<string, []>("transpose_159")];
            tensor<fp16, [1, 1, 1, 256]> var_3304_cast_fp16 = matmul(transpose_x = var_3304_transpose_x_0, transpose_y = var_3304_transpose_y_0, x = transpose_138, y = transpose_139)[name = tensor<string, []>("op_3304_cast_fp16")];
            tensor<fp16, []> var_3305_to_fp16 = const()[name = tensor<string, []>("op_3305_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_41_cast_fp16 = mul(x = var_3304_cast_fp16, y = var_3305_to_fp16)[name = tensor<string, []>("xscores_41_cast_fp16")];
            tensor<fp16, []> var_3323_to_fp16 = const()[name = tensor<string, []>("op_3323_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_43_cast_fp16 = select(a = var_3323_to_fp16, b = xscores_41_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_43_cast_fp16")];
            tensor<int32, []> var_3325 = const()[name = tensor<string, []>("op_3325"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_21_cast_fp16 = softmax(axis = var_3325, x = xscores_43_cast_fp16)[name = tensor<string, []>("xprobs_21_cast_fp16")];
            tensor<bool, []> var_3328_transpose_x_0 = const()[name = tensor<string, []>("op_3328_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3328_transpose_y_0 = const()[name = tensor<string, []>("op_3328_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_10_to_fp16 = cast(dtype = xa_v_10_to_fp16_dtype_0, x = xa_v_10)[name = tensor<string, []>("cast_31")];
            tensor<fp16, [1, 1, 256, 128]> xvT_21_cast_fp16 = transpose(perm = var_3302, x = xa_v_10_to_fp16)[name = tensor<string, []>("transpose_157")];
            tensor<fp16, [1, 1, 1, 128]> var_3328_cast_fp16 = matmul(transpose_x = var_3328_transpose_x_0, transpose_y = var_3328_transpose_y_0, x = xprobs_21_cast_fp16, y = xvT_21_cast_fp16)[name = tensor<string, []>("op_3328_cast_fp16")];
            tensor<int32, [4]> var_3333 = const()[name = tensor<string, []>("op_3333"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3338 = const()[name = tensor<string, []>("op_3338"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3334_cast_fp16 = transpose(perm = var_3333, x = var_3328_cast_fp16)[name = tensor<string, []>("transpose_156")];
            tensor<fp16, [1, 1, 128]> input_153_cast_fp16 = reshape(shape = var_3338, x = var_3334_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77018048))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77116416))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_43_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
            tensor<int32, [1]> x_81_axes_0 = const()[name = tensor<string, []>("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77118016)))];
            tensor<fp16, []> var_3346_to_fp16 = const()[name = tensor<string, []>("op_3346_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_3346_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
            tensor<int32, [3]> var_3362 = const()[name = tensor<string, []>("op_3362"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_41_pad_type_0 = const()[name = tensor<string, []>("y_41_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_41_strides_0 = const()[name = tensor<string, []>("y_41_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_41_pad_0 = const()[name = tensor<string, []>("y_41_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_41_dilations_0 = const()[name = tensor<string, []>("y_41_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_41_groups_0 = const()[name = tensor<string, []>("y_41_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77119616))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79478976))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_83_cast_fp16 = transpose(perm = var_3362, x = x_81_cast_fp16)[name = tensor<string, []>("transpose_155")];
            tensor<fp16, [1, 3072, 1]> y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_83_cast_fp16)[name = tensor<string, []>("y_41_cast_fp16")];
            tensor<string, []> x_85_mode_0 = const()[name = tensor<string, []>("x_85_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_85_cast_fp16 = gelu(mode = x_85_mode_0, x = y_41_cast_fp16)[name = tensor<string, []>("x_85_cast_fp16")];
            tensor<string, []> y_43_pad_type_0 = const()[name = tensor<string, []>("y_43_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_43_strides_0 = const()[name = tensor<string, []>("y_43_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_43_pad_0 = const()[name = tensor<string, []>("y_43_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_43_dilations_0 = const()[name = tensor<string, []>("y_43_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_43_groups_0 = const()[name = tensor<string, []>("y_43_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79485184))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81844544))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_43_cast_fp16 = conv(dilations = y_43_dilations_0, groups = y_43_groups_0, pad = y_43_pad_0, pad_type = y_43_pad_type_0, strides = y_43_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = tensor<string, []>("y_43_cast_fp16")];
            tensor<int32, [3]> var_3380 = const()[name = tensor<string, []>("op_3380"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3381_cast_fp16 = transpose(perm = var_3380, x = y_43_cast_fp16)[name = tensor<string, []>("transpose_154")];
            tensor<fp16, [1, 1, 768]> input_157_cast_fp16 = add(x = input_155_cast_fp16, y = var_3381_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
            tensor<int32, [1]> input_159_axes_0 = const()[name = tensor<string, []>("input_159_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81846144)))];
            tensor<fp16, []> var_3385_to_fp16 = const()[name = tensor<string, []>("op_3385_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, epsilon = var_3385_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81847744))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83617280))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
            tensor<int32, [5]> var_3399 = const()[name = tensor<string, []>("op_3399"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_3399, x = linear_44_cast_fp16)[name = tensor<string, []>("qkv_cast_fp16")];
            tensor<int32, [5]> q_begin_0 = const()[name = tensor<string, []>("q_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_end_0 = const()[name = tensor<string, []>("q_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_end_mask_0 = const()[name = tensor<string, []>("q_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_squeeze_mask_0 = const()[name = tensor<string, []>("q_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_cast_fp16 = slice_by_index(begin = q_begin_0, end = q_end_0, end_mask = q_end_mask_0, squeeze_mask = q_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("q_cast_fp16")];
            tensor<int32, [5]> new_k_begin_0 = const()[name = tensor<string, []>("new_k_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_end_0 = const()[name = tensor<string, []>("new_k_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_end_mask_0 = const()[name = tensor<string, []>("new_k_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_k_cast_fp16")];
            tensor<int32, [5]> new_v_begin_0 = const()[name = tensor<string, []>("new_v_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_end_0 = const()[name = tensor<string, []>("new_v_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_end_mask_0 = const()[name = tensor<string, []>("new_v_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_v_cast_fp16")];
            tensor<string, []> sa_k_in_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_11_to_fp16 = cast(dtype = sa_k_in_11_to_fp16_dtype_0, x = sa_k_in_11)[name = tensor<string, []>("cast_30")];
            tensor<fp16, [1, 600, 12, 64]> var_3460_cast_fp16 = mul(x = sa_k_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3460_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3461_cast_fp16 = mul(x = new_k_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3461_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_cast_fp16 = add(x = var_3460_cast_fp16, y = var_3461_cast_fp16)[name = tensor<string, []>("sa_k_out_cast_fp16")];
            tensor<string, []> sa_k_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_11_to_fp16 = cast(dtype = sa_v_in_11_to_fp16_dtype_0, x = sa_v_in_11)[name = tensor<string, []>("cast_29")];
            tensor<fp16, [1, 600, 12, 64]> var_3467_cast_fp16 = mul(x = sa_v_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3467_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3468_cast_fp16 = mul(x = new_v_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3468_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_cast_fp16 = add(x = var_3467_cast_fp16, y = var_3468_cast_fp16)[name = tensor<string, []>("sa_v_out_cast_fp16")];
            tensor<string, []> sa_v_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_3487 = const()[name = tensor<string, []>("op_3487"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_3489_transpose_x_0 = const()[name = tensor<string, []>("op_3489_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3489_transpose_y_0 = const()[name = tensor<string, []>("op_3489_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_140_perm_0 = const()[name = tensor<string, []>("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_141_perm_0 = const()[name = tensor<string, []>("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_141 = transpose(perm = transpose_141_perm_0, x = sa_k_out_cast_fp16)[name = tensor<string, []>("transpose_152")];
            tensor<fp16, [1, 12, 1, 64]> transpose_140 = transpose(perm = transpose_140_perm_0, x = q_cast_fp16)[name = tensor<string, []>("transpose_153")];
            tensor<fp16, [1, 12, 1, 600]> var_3489_cast_fp16 = matmul(transpose_x = var_3489_transpose_x_0, transpose_y = var_3489_transpose_y_0, x = transpose_140, y = transpose_141)[name = tensor<string, []>("op_3489_cast_fp16")];
            tensor<fp16, []> var_3490_to_fp16 = const()[name = tensor<string, []>("op_3490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_45_cast_fp16 = mul(x = var_3489_cast_fp16, y = var_3490_to_fp16)[name = tensor<string, []>("scores_45_cast_fp16")];
            tensor<fp16, []> var_3508_to_fp16 = const()[name = tensor<string, []>("op_3508_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_cast_fp16 = select(a = var_3508_to_fp16, b = scores_45_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_cast_fp16")];
            tensor<int32, []> var_3510 = const()[name = tensor<string, []>("op_3510"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_cast_fp16 = softmax(axis = var_3510, x = scores_cast_fp16)[name = tensor<string, []>("probs_cast_fp16")];
            tensor<bool, []> var_3513_transpose_x_0 = const()[name = tensor<string, []>("op_3513_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3513_transpose_y_0 = const()[name = tensor<string, []>("op_3513_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_cast_fp16 = transpose(perm = var_3487, x = sa_v_out_cast_fp16)[name = tensor<string, []>("transpose_151")];
            tensor<fp16, [1, 12, 1, 64]> var_3513_cast_fp16 = matmul(transpose_x = var_3513_transpose_x_0, transpose_y = var_3513_transpose_y_0, x = probs_cast_fp16, y = v_t_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
            tensor<int32, [4]> var_3518 = const()[name = tensor<string, []>("op_3518"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3523 = const()[name = tensor<string, []>("op_3523"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_3519_cast_fp16 = transpose(perm = var_3518, x = var_3513_cast_fp16)[name = tensor<string, []>("transpose_150")];
            tensor<fp16, [1, 1, 768]> input_161_cast_fp16 = reshape(shape = var_3523, x = var_3519_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83621952))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84211840))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_163_cast_fp16 = add(x = input_157_cast_fp16, y = linear_45_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
            tensor<int32, [1]> input_165_axes_0 = const()[name = tensor<string, []>("input_165_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84213440)))];
            tensor<fp16, []> var_3531_to_fp16 = const()[name = tensor<string, []>("op_3531_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_165_cast_fp16 = layer_norm(axes = input_165_axes_0, epsilon = var_3531_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84215040))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84313408))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_46_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
            tensor<int32, [4]> var_3544 = const()[name = tensor<string, []>("op_3544"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_cast_fp16 = reshape(shape = var_3544, x = linear_46_cast_fp16)[name = tensor<string, []>("xq_proj_cast_fp16")];
            tensor<int32, [4]> var_3562 = const()[name = tensor<string, []>("op_3562"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3564_transpose_x_0 = const()[name = tensor<string, []>("op_3564_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3564_transpose_y_0 = const()[name = tensor<string, []>("op_3564_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_142_perm_0 = const()[name = tensor<string, []>("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_143_perm_0 = const()[name = tensor<string, []>("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_11_to_fp16 = cast(dtype = xa_k_11_to_fp16_dtype_0, x = xa_k_11)[name = tensor<string, []>("cast_28")];
            tensor<fp16, [1, 1, 128, 256]> transpose_143 = transpose(perm = transpose_143_perm_0, x = xa_k_11_to_fp16)[name = tensor<string, []>("transpose_148")];
            tensor<fp16, [1, 1, 1, 128]> transpose_142 = transpose(perm = transpose_142_perm_0, x = xq_proj_cast_fp16)[name = tensor<string, []>("transpose_149")];
            tensor<fp16, [1, 1, 1, 256]> var_3564_cast_fp16 = matmul(transpose_x = var_3564_transpose_x_0, transpose_y = var_3564_transpose_y_0, x = transpose_142, y = transpose_143)[name = tensor<string, []>("op_3564_cast_fp16")];
            tensor<fp16, []> var_3565_to_fp16 = const()[name = tensor<string, []>("op_3565_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_45_cast_fp16 = mul(x = var_3564_cast_fp16, y = var_3565_to_fp16)[name = tensor<string, []>("xscores_45_cast_fp16")];
            tensor<fp16, []> var_3583_to_fp16 = const()[name = tensor<string, []>("op_3583_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_cast_fp16 = select(a = var_3583_to_fp16, b = xscores_45_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_cast_fp16")];
            tensor<int32, []> var_3585 = const()[name = tensor<string, []>("op_3585"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_cast_fp16 = softmax(axis = var_3585, x = xscores_cast_fp16)[name = tensor<string, []>("xprobs_cast_fp16")];
            tensor<bool, []> var_3588_transpose_x_0 = const()[name = tensor<string, []>("op_3588_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3588_transpose_y_0 = const()[name = tensor<string, []>("op_3588_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_11_to_fp16 = cast(dtype = xa_v_11_to_fp16_dtype_0, x = xa_v_11)[name = tensor<string, []>("cast_27")];
            tensor<fp16, [1, 1, 256, 128]> xvT_cast_fp16 = transpose(perm = var_3562, x = xa_v_11_to_fp16)[name = tensor<string, []>("transpose_147")];
            tensor<fp16, [1, 1, 1, 128]> var_3588_cast_fp16 = matmul(transpose_x = var_3588_transpose_x_0, transpose_y = var_3588_transpose_y_0, x = xprobs_cast_fp16, y = xvT_cast_fp16)[name = tensor<string, []>("op_3588_cast_fp16")];
            tensor<int32, [4]> var_3593 = const()[name = tensor<string, []>("op_3593"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3598 = const()[name = tensor<string, []>("op_3598"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3594_cast_fp16 = transpose(perm = var_3593, x = var_3588_cast_fp16)[name = tensor<string, []>("transpose_146")];
            tensor<fp16, [1, 1, 128]> input_167_cast_fp16 = reshape(shape = var_3598, x = var_3594_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84313728))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84412096))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_47_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_47_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
            tensor<int32, [1]> x_89_axes_0 = const()[name = tensor<string, []>("x_89_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84413696)))];
            tensor<fp16, []> var_3606_to_fp16 = const()[name = tensor<string, []>("op_3606_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_89_cast_fp16 = layer_norm(axes = x_89_axes_0, epsilon = var_3606_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("x_89_cast_fp16")];
            tensor<int32, [3]> var_3622 = const()[name = tensor<string, []>("op_3622"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_45_pad_type_0 = const()[name = tensor<string, []>("y_45_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_45_strides_0 = const()[name = tensor<string, []>("y_45_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_45_pad_0 = const()[name = tensor<string, []>("y_45_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_45_dilations_0 = const()[name = tensor<string, []>("y_45_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_45_groups_0 = const()[name = tensor<string, []>("y_45_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84415296))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86774656))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_91_cast_fp16 = transpose(perm = var_3622, x = x_89_cast_fp16)[name = tensor<string, []>("transpose_145")];
            tensor<fp16, [1, 3072, 1]> y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = tensor<string, []>("y_45_cast_fp16")];
            tensor<string, []> x_93_mode_0 = const()[name = tensor<string, []>("x_93_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_45_cast_fp16)[name = tensor<string, []>("x_93_cast_fp16")];
            tensor<string, []> y_pad_type_0 = const()[name = tensor<string, []>("y_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_strides_0 = const()[name = tensor<string, []>("y_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_pad_0 = const()[name = tensor<string, []>("y_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_dilations_0 = const()[name = tensor<string, []>("y_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_groups_0 = const()[name = tensor<string, []>("y_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86780864))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89140224))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = tensor<string, []>("y_cast_fp16")];
            tensor<int32, [3]> var_3640 = const()[name = tensor<string, []>("op_3640"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3641_cast_fp16 = transpose(perm = var_3640, x = y_cast_fp16)[name = tensor<string, []>("transpose_144")];
            tensor<fp16, [1, 1, 768]> input_171_cast_fp16 = add(x = input_169_cast_fp16, y = var_3641_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
            tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("dec_norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89141824)))];
            tensor<fp16, []> var_3645_to_fp16 = const()[name = tensor<string, []>("op_3645_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, epsilon = var_3645_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
            tensor<string, []> input_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("input_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp16, [16192, 768]> dec_final_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_final_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [16192, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89143424))), scale = tensor<fp16, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101595200))), zero_point = tensor<int8, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101578944)))];
            tensor<fp16, [16192]> dec_final_proj_bias_to_fp16 = const()[name = tensor<string, []>("dec_final_proj_bias_to_fp16"), val = tensor<fp16, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101627648)))];
            tensor<fp16, [1, 1, 16192]> linear_48_cast_fp16 = linear(bias = dec_final_proj_bias_to_fp16, weight = dec_final_proj_weight_to_fp16_quantized, x = input_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
            tensor<int32, [4]> var_3658 = const()[name = tensor<string, []>("op_3658"), val = tensor<int32, [4]>([1, 1, 8, 2024])];
            tensor<fp16, [1, 1, 8, 2024]> var_3659_cast_fp16 = reshape(shape = var_3658, x = linear_48_cast_fp16)[name = tensor<string, []>("op_3659_cast_fp16")];
            tensor<string, []> var_3659_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_3659_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_0 = cast(dtype = sa_k_out_1_cast_fp16_to_fp32_dtype_0, x = sa_k_out_1_cast_fp16)[name = tensor<string, []>("cast_1")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_0 = cast(dtype = sa_v_out_1_cast_fp16_to_fp32_dtype_0, x = sa_v_out_1_cast_fp16)[name = tensor<string, []>("cast_2")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_1 = cast(dtype = sa_k_out_3_cast_fp16_to_fp32_dtype_0, x = sa_k_out_3_cast_fp16)[name = tensor<string, []>("cast_3")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_1 = cast(dtype = sa_v_out_3_cast_fp16_to_fp32_dtype_0, x = sa_v_out_3_cast_fp16)[name = tensor<string, []>("cast_4")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_2 = cast(dtype = sa_k_out_5_cast_fp16_to_fp32_dtype_0, x = sa_k_out_5_cast_fp16)[name = tensor<string, []>("cast_5")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_2 = cast(dtype = sa_v_out_5_cast_fp16_to_fp32_dtype_0, x = sa_v_out_5_cast_fp16)[name = tensor<string, []>("cast_6")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_3 = cast(dtype = sa_k_out_7_cast_fp16_to_fp32_dtype_0, x = sa_k_out_7_cast_fp16)[name = tensor<string, []>("cast_7")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_3 = cast(dtype = sa_v_out_7_cast_fp16_to_fp32_dtype_0, x = sa_v_out_7_cast_fp16)[name = tensor<string, []>("cast_8")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_4 = cast(dtype = sa_k_out_9_cast_fp16_to_fp32_dtype_0, x = sa_k_out_9_cast_fp16)[name = tensor<string, []>("cast_9")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_4 = cast(dtype = sa_v_out_9_cast_fp16_to_fp32_dtype_0, x = sa_v_out_9_cast_fp16)[name = tensor<string, []>("cast_10")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_5 = cast(dtype = sa_k_out_11_cast_fp16_to_fp32_dtype_0, x = sa_k_out_11_cast_fp16)[name = tensor<string, []>("cast_11")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_5 = cast(dtype = sa_v_out_11_cast_fp16_to_fp32_dtype_0, x = sa_v_out_11_cast_fp16)[name = tensor<string, []>("cast_12")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_6 = cast(dtype = sa_k_out_13_cast_fp16_to_fp32_dtype_0, x = sa_k_out_13_cast_fp16)[name = tensor<string, []>("cast_13")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_6 = cast(dtype = sa_v_out_13_cast_fp16_to_fp32_dtype_0, x = sa_v_out_13_cast_fp16)[name = tensor<string, []>("cast_14")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_7 = cast(dtype = sa_k_out_15_cast_fp16_to_fp32_dtype_0, x = sa_k_out_15_cast_fp16)[name = tensor<string, []>("cast_15")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_7 = cast(dtype = sa_v_out_15_cast_fp16_to_fp32_dtype_0, x = sa_v_out_15_cast_fp16)[name = tensor<string, []>("cast_16")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_8 = cast(dtype = sa_k_out_17_cast_fp16_to_fp32_dtype_0, x = sa_k_out_17_cast_fp16)[name = tensor<string, []>("cast_17")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_8 = cast(dtype = sa_v_out_17_cast_fp16_to_fp32_dtype_0, x = sa_v_out_17_cast_fp16)[name = tensor<string, []>("cast_18")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_9 = cast(dtype = sa_k_out_19_cast_fp16_to_fp32_dtype_0, x = sa_k_out_19_cast_fp16)[name = tensor<string, []>("cast_19")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_9 = cast(dtype = sa_v_out_19_cast_fp16_to_fp32_dtype_0, x = sa_v_out_19_cast_fp16)[name = tensor<string, []>("cast_20")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_10 = cast(dtype = sa_k_out_21_cast_fp16_to_fp32_dtype_0, x = sa_k_out_21_cast_fp16)[name = tensor<string, []>("cast_21")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_10 = cast(dtype = sa_v_out_21_cast_fp16_to_fp32_dtype_0, x = sa_v_out_21_cast_fp16)[name = tensor<string, []>("cast_22")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_11 = cast(dtype = sa_k_out_cast_fp16_to_fp32_dtype_0, x = sa_k_out_cast_fp16)[name = tensor<string, []>("cast_23")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_11 = cast(dtype = sa_v_out_cast_fp16_to_fp32_dtype_0, x = sa_v_out_cast_fp16)[name = tensor<string, []>("cast_24")];
            tensor<fp32, [1, 1, 768]> h_last = cast(dtype = input_cast_fp16_to_fp32_dtype_0, x = input_cast_fp16)[name = tensor<string, []>("cast_25")];
            tensor<fp32, [1, 1, 8, 2024]> logits = cast(dtype = var_3659_cast_fp16_to_fp32_dtype_0, x = var_3659_cast_fp16)[name = tensor<string, []>("cast_26")];
            tensor<fp32, [1, 256, 768]> encoder_output_tmp = identity(x = encoder_output)[name = tensor<string, []>("encoder_output_tmp")];
        } -> (logits, h_last, sa_k_out_0, sa_k_out_1, sa_k_out_2, sa_k_out_3, sa_k_out_4, sa_k_out_5, sa_k_out_6, sa_k_out_7, sa_k_out_8, sa_k_out_9, sa_k_out_10, sa_k_out_11, sa_v_out_0, sa_v_out_1, sa_v_out_2, sa_v_out_3, sa_v_out_4, sa_v_out_5, sa_v_out_6, sa_v_out_7, sa_v_out_8, sa_v_out_9, sa_v_out_10, sa_v_out_11);
}