File size: 153,849 Bytes
cc7b0cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
{
    func main<ios17>(tensor<int32, [1, ?]> attention_mask, tensor<int32, [1, ?]> input_ids) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"attention_mask", [1, 16]}, {"input_ids", [1, 16]}}), ("RangeDims", {{"attention_mask", [[1, 1], [1, 64]]}, {"input_ids", [[1, 1], [1, 64]]}})))] {
            tensor<fp32, [384, 256]> encoder_embed_tokens_weight = const()[name = tensor<string, []>("encoder_embed_tokens_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
            tensor<fp32, [256]> encoder_block_0_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(393344)))];
            tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394432)))];
            tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787712)))];
            tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1180992)))];
            tensor<fp32, [32, 6]> encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight"), val = tensor<fp32, [32, 6]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1574272)))];
            tensor<fp32, [256, 384]> encoder_block_0_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1575104)))];
            tensor<fp32, [256]> encoder_block_0_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1968384)))];
            tensor<fp32, [1024, 256]> encoder_block_0_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1969472)))];
            tensor<fp32, [1024, 256]> encoder_block_0_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3018112)))];
            tensor<fp32, [256, 1024]> encoder_block_0_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4066752)))];
            tensor<fp32, [256]> encoder_block_1_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5115392)))];
            tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5116480)))];
            tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5509760)))];
            tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5903040)))];
            tensor<fp32, [256, 384]> encoder_block_1_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6296320)))];
            tensor<fp32, [256]> encoder_block_1_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6689600)))];
            tensor<fp32, [1024, 256]> encoder_block_1_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6690688)))];
            tensor<fp32, [1024, 256]> encoder_block_1_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7739328)))];
            tensor<fp32, [256, 1024]> encoder_block_1_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8787968)))];
            tensor<fp32, [256]> encoder_block_2_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9836608)))];
            tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9837696)))];
            tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10230976)))];
            tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10624256)))];
            tensor<fp32, [256, 384]> encoder_block_2_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11017536)))];
            tensor<fp32, [256]> encoder_block_2_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11410816)))];
            tensor<fp32, [1024, 256]> encoder_block_2_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11411904)))];
            tensor<fp32, [1024, 256]> encoder_block_2_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12460544)))];
            tensor<fp32, [256, 1024]> encoder_block_2_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13509184)))];
            tensor<fp32, [256]> encoder_block_3_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14557824)))];
            tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14558912)))];
            tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14952192)))];
            tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15345472)))];
            tensor<fp32, [256, 384]> encoder_block_3_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15738752)))];
            tensor<fp32, [256]> encoder_block_3_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16132032)))];
            tensor<fp32, [1024, 256]> encoder_block_3_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16133120)))];
            tensor<fp32, [1024, 256]> encoder_block_3_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17181760)))];
            tensor<fp32, [256, 1024]> encoder_block_3_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18230400)))];
            tensor<fp32, [256]> encoder_block_4_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19279040)))];
            tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19280128)))];
            tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19673408)))];
            tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20066688)))];
            tensor<fp32, [256, 384]> encoder_block_4_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20459968)))];
            tensor<fp32, [256]> encoder_block_4_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20853248)))];
            tensor<fp32, [1024, 256]> encoder_block_4_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20854336)))];
            tensor<fp32, [1024, 256]> encoder_block_4_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21902976)))];
            tensor<fp32, [256, 1024]> encoder_block_4_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22951616)))];
            tensor<fp32, [256]> encoder_block_5_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24000256)))];
            tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24001344)))];
            tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24394624)))];
            tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24787904)))];
            tensor<fp32, [256, 384]> encoder_block_5_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25181184)))];
            tensor<fp32, [256]> encoder_block_5_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25574464)))];
            tensor<fp32, [1024, 256]> encoder_block_5_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25575552)))];
            tensor<fp32, [1024, 256]> encoder_block_5_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26624192)))];
            tensor<fp32, [256, 1024]> encoder_block_5_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27672832)))];
            tensor<fp32, [256]> encoder_block_6_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28721472)))];
            tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28722560)))];
            tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29115840)))];
            tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29509120)))];
            tensor<fp32, [256, 384]> encoder_block_6_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29902400)))];
            tensor<fp32, [256]> encoder_block_6_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30295680)))];
            tensor<fp32, [1024, 256]> encoder_block_6_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30296768)))];
            tensor<fp32, [1024, 256]> encoder_block_6_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31345408)))];
            tensor<fp32, [256, 1024]> encoder_block_6_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32394048)))];
            tensor<fp32, [256]> encoder_block_7_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33442688)))];
            tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33443776)))];
            tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33837056)))];
            tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34230336)))];
            tensor<fp32, [256, 384]> encoder_block_7_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34623616)))];
            tensor<fp32, [256]> encoder_block_7_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35016896)))];
            tensor<fp32, [1024, 256]> encoder_block_7_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35017984)))];
            tensor<fp32, [1024, 256]> encoder_block_7_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36066624)))];
            tensor<fp32, [256, 1024]> encoder_block_7_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37115264)))];
            tensor<fp32, [256]> encoder_block_8_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38163904)))];
            tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38164992)))];
            tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38558272)))];
            tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38951552)))];
            tensor<fp32, [256, 384]> encoder_block_8_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39344832)))];
            tensor<fp32, [256]> encoder_block_8_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39738112)))];
            tensor<fp32, [1024, 256]> encoder_block_8_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39739200)))];
            tensor<fp32, [1024, 256]> encoder_block_8_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40787840)))];
            tensor<fp32, [256, 1024]> encoder_block_8_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41836480)))];
            tensor<fp32, [256]> encoder_block_9_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42885120)))];
            tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42886208)))];
            tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43279488)))];
            tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43672768)))];
            tensor<fp32, [256, 384]> encoder_block_9_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44066048)))];
            tensor<fp32, [256]> encoder_block_9_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44459328)))];
            tensor<fp32, [1024, 256]> encoder_block_9_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44460416)))];
            tensor<fp32, [1024, 256]> encoder_block_9_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45509056)))];
            tensor<fp32, [256, 1024]> encoder_block_9_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46557696)))];
            tensor<fp32, [256]> encoder_block_10_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47606336)))];
            tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47607424)))];
            tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48000704)))];
            tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48393984)))];
            tensor<fp32, [256, 384]> encoder_block_10_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48787264)))];
            tensor<fp32, [256]> encoder_block_10_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49180544)))];
            tensor<fp32, [1024, 256]> encoder_block_10_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49181632)))];
            tensor<fp32, [1024, 256]> encoder_block_10_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50230272)))];
            tensor<fp32, [256, 1024]> encoder_block_10_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51278912)))];
            tensor<fp32, [256]> encoder_block_11_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52327552)))];
            tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52328640)))];
            tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52721920)))];
            tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53115200)))];
            tensor<fp32, [256, 384]> encoder_block_11_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53508480)))];
            tensor<fp32, [256]> encoder_block_11_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53901760)))];
            tensor<fp32, [1024, 256]> encoder_block_11_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53902848)))];
            tensor<fp32, [1024, 256]> encoder_block_11_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54951488)))];
            tensor<fp32, [256, 1024]> encoder_block_11_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56000128)))];
            tensor<fp32, [256]> encoder_final_layer_norm_weight = const()[name = tensor<string, []>("encoder_final_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57048768)))];
            tensor<int32, []> var_7 = const()[name = tensor<string, []>("op_7"), val = tensor<int32, []>(8)];
            tensor<fp32, []> var_13 = const()[name = tensor<string, []>("op_13"), val = tensor<fp32, []>(0x1p+0)];
            tensor<int32, []> var_19 = const()[name = tensor<string, []>("op_19"), val = tensor<int32, []>(0)];
            tensor<int32, []> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, []>(-1)];
            tensor<int32, []> input_3_batch_dims_0 = const()[name = tensor<string, []>("input_3_batch_dims_0"), val = tensor<int32, []>(0)];
            tensor<bool, []> input_3_validate_indices_0 = const()[name = tensor<string, []>("input_3_validate_indices_0"), val = tensor<bool, []>(false)];
            tensor<int32, []> greater_equal_1_y_0 = const()[name = tensor<string, []>("greater_equal_1_y_0"), val = tensor<int32, []>(0)];
            tensor<bool, [1, ?]> greater_equal_1 = greater_equal(x = input_ids, y = greater_equal_1_y_0)[name = tensor<string, []>("greater_equal_1")];
            tensor<int32, []> slice_by_index_1 = const()[name = tensor<string, []>("slice_by_index_1"), val = tensor<int32, []>(384)];
            tensor<int32, [1, ?]> add_1 = add(x = input_ids, y = slice_by_index_1)[name = tensor<string, []>("add_1")];
            tensor<int32, [1, ?]> select_1 = select(a = input_ids, b = add_1, cond = greater_equal_1)[name = tensor<string, []>("select_1")];
            tensor<int32, []> input_3_axis_1 = const()[name = tensor<string, []>("input_3_axis_1"), val = tensor<int32, []>(0)];
            tensor<fp32, [1, ?, 256]> input_3 = gather(axis = input_3_axis_1, batch_dims = input_3_batch_dims_0, indices = select_1, validate_indices = input_3_validate_indices_0, x = encoder_embed_tokens_weight)[name = tensor<string, []>("input_3")];
            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [1, 1, ?]> var_55 = expand_dims(axes = var_55_axes_0, x = attention_mask)[name = tensor<string, []>("op_55")];
            tensor<int32, [1]> var_56_axes_0 = const()[name = tensor<string, []>("op_56_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<int32, [1, 1, 1, ?]> var_56 = expand_dims(axes = var_56_axes_0, x = var_55)[name = tensor<string, []>("op_56")];
            tensor<string, []> var_58_dtype_0 = const()[name = tensor<string, []>("op_58_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, [1, 1, 1, ?]> var_58 = cast(dtype = var_58_dtype_0, x = var_56)[name = tensor<string, []>("cast_58")];
            tensor<fp32, [1, 1, 1, ?]> var_59 = sub(x = var_13, y = var_58)[name = tensor<string, []>("op_59")];
            tensor<fp32, []> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<fp32, []>(-0x1.fffffep+127)];
            tensor<fp32, [1, 1, 1, ?]> mask = mul(x = var_59, y = var_60)[name = tensor<string, []>("mask")];
            tensor<fp32, []> var_17_promoted = const()[name = tensor<string, []>("op_17_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_71 = pow(x = input_3, y = var_17_promoted)[name = tensor<string, []>("op_71")];
            tensor<int32, [1]> variance_1_axes_0 = const()[name = tensor<string, []>("variance_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_1_keep_dims_0 = const()[name = tensor<string, []>("variance_1_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_71)[name = tensor<string, []>("variance_1")];
            tensor<fp32, []> var_74 = const()[name = tensor<string, []>("op_74"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_75 = add(x = variance_1, y = var_74)[name = tensor<string, []>("op_75")];
            tensor<fp32, []> var_76_epsilon_0 = const()[name = tensor<string, []>("op_76_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_76 = rsqrt(epsilon = var_76_epsilon_0, x = var_75)[name = tensor<string, []>("op_76")];
            tensor<fp32, [1, ?, 256]> hidden_states_5 = mul(x = input_3, y = var_76)[name = tensor<string, []>("hidden_states_5")];
            tensor<fp32, [1, ?, 256]> hidden_states_7 = mul(x = encoder_block_0_layer_0_layer_norm_weight, y = hidden_states_5)[name = tensor<string, []>("hidden_states_7")];
            tensor<int32, [3]> var_88_shape = shape(x = hidden_states_7)[name = tensor<string, []>("op_88_shape")];
            tensor<int32, []> gather_2_batch_dims_0 = const()[name = tensor<string, []>("gather_2_batch_dims_0"), val = tensor<int32, []>(0)];
            tensor<bool, []> gather_2_validate_indices_0 = const()[name = tensor<string, []>("gather_2_validate_indices_0"), val = tensor<bool, []>(false)];
            tensor<int32, []> select_2 = const()[name = tensor<string, []>("select_2"), val = tensor<int32, []>(1)];
            tensor<int32, []> gather_2_axis_1 = const()[name = tensor<string, []>("gather_2_axis_1"), val = tensor<int32, []>(0)];
            tensor<int32, []> gather_2 = gather(axis = gather_2_axis_1, batch_dims = gather_2_batch_dims_0, indices = select_2, validate_indices = gather_2_validate_indices_0, x = var_88_shape)[name = tensor<string, []>("gather_2")];
            tensor<fp32, [384]> linear_0_bias_0 = const()[name = tensor<string, []>("linear_0_bias_0"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57049856)))];
            tensor<fp32, [1, ?, 384]> states_1 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_q_weight, x = hidden_states_7)[name = tensor<string, []>("linear_0")];
            tensor<int32, [4]> var_91 = const()[name = tensor<string, []>("op_91"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_92 = reshape(shape = var_91, x = states_1)[name = tensor<string, []>("op_92")];
            tensor<fp32, [1, ?, 384]> states_3 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_k_weight, x = hidden_states_7)[name = tensor<string, []>("linear_1")];
            tensor<int32, [4]> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_97 = reshape(shape = var_96, x = states_3)[name = tensor<string, []>("op_97")];
            tensor<fp32, [1, ?, 384]> states_5 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_v_weight, x = hidden_states_7)[name = tensor<string, []>("linear_2")];
            tensor<int32, [4]> var_101 = const()[name = tensor<string, []>("op_101"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_102 = reshape(shape = var_101, x = states_5)[name = tensor<string, []>("op_102")];
            tensor<int32, [4]> value_states_1_perm_0 = const()[name = tensor<string, []>("value_states_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_36_perm_0 = const()[name = tensor<string, []>("transpose_36_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_37_perm_0 = const()[name = tensor<string, []>("transpose_37_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_37 = transpose(perm = transpose_37_perm_0, x = var_97)[name = tensor<string, []>("transpose_106")];
            tensor<fp32, [1, 6, ?, 64]> transpose_36 = transpose(perm = transpose_36_perm_0, x = var_92)[name = tensor<string, []>("transpose_107")];
            tensor<fp32, [1, 6, ?, ?]> scores_1 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_36, y = transpose_37)[name = tensor<string, []>("scores_1")];
            tensor<int32, []> const_0 = const()[name = tensor<string, []>("const_0"), val = tensor<int32, []>(0)];
            tensor<int32, []> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<int32, []>(1)];
            tensor<int32, [?]> var_106 = range_1d(end = gather_2, start = const_0, step = const_1)[name = tensor<string, []>("op_106")];
            tensor<int32, [1]> context_position_axes_0 = const()[name = tensor<string, []>("context_position_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [?, 1]> context_position = expand_dims(axes = context_position_axes_0, x = var_106)[name = tensor<string, []>("context_position")];
            tensor<int32, [1]> var_110_axes_0 = const()[name = tensor<string, []>("op_110_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1, ?]> var_110 = expand_dims(axes = var_110_axes_0, x = var_106)[name = tensor<string, []>("op_110")];
            tensor<int32, [?, ?]> relative_position_1 = sub(x = var_110, y = context_position)[name = tensor<string, []>("relative_position_1")];
            tensor<bool, [?, ?]> var_113 = greater(x = relative_position_1, y = var_19)[name = tensor<string, []>("op_113")];
            tensor<string, []> var_114_dtype_0 = const()[name = tensor<string, []>("op_114_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, []>(16)];
            tensor<int32, [?, ?]> var_114 = cast(dtype = var_114_dtype_0, x = var_113)[name = tensor<string, []>("cast_57")];
            tensor<int32, [?, ?]> var_116 = mul(x = var_114, y = var_115)[name = tensor<string, []>("op_116")];
            tensor<int32, [?, ?]> relative_position = abs(x = relative_position_1)[name = tensor<string, []>("relative_position")];
            tensor<bool, [?, ?]> is_small = less(x = relative_position, y = var_7)[name = tensor<string, []>("is_small")];
            tensor<string, []> var_121_dtype_0 = const()[name = tensor<string, []>("op_121_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, []> _inversed_123_y_0 = const()[name = tensor<string, []>("_inversed_123_y_0"), val = tensor<fp32, []>(0x1p-3)];
            tensor<fp32, [?, ?]> var_121 = cast(dtype = var_121_dtype_0, x = relative_position)[name = tensor<string, []>("cast_56")];
            tensor<fp32, [?, ?]> _inversed_123 = mul(x = var_121, y = _inversed_123_y_0)[name = tensor<string, []>("_inversed_123")];
            tensor<fp32, []> var_124_epsilon_0 = const()[name = tensor<string, []>("op_124_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
            tensor<fp32, [?, ?]> var_124 = log(epsilon = var_124_epsilon_0, x = _inversed_123)[name = tensor<string, []>("op_124")];
            tensor<fp32, []> _inversed_126_y_0 = const()[name = tensor<string, []>("_inversed_126_y_0"), val = tensor<fp32, []>(0x1.715476p-2)];
            tensor<fp32, [?, ?]> _inversed_126 = mul(x = var_124, y = _inversed_126_y_0)[name = tensor<string, []>("_inversed_126")];
            tensor<fp32, []> var_127_promoted = const()[name = tensor<string, []>("op_127_promoted"), val = tensor<fp32, []>(0x1p+3)];
            tensor<fp32, [?, ?]> var_128 = mul(x = _inversed_126, y = var_127_promoted)[name = tensor<string, []>("op_128")];
            tensor<string, []> var_129_dtype_0 = const()[name = tensor<string, []>("op_129_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> var_130 = const()[name = tensor<string, []>("op_130"), val = tensor<int32, []>(8)];
            tensor<int32, [?, ?]> var_129 = cast(dtype = var_129_dtype_0, x = var_128)[name = tensor<string, []>("cast_55")];
            tensor<int32, [?, ?]> relative_position_if_large_1 = add(x = var_129, y = var_130)[name = tensor<string, []>("relative_position_if_large_1")];
            tensor<int32, []> var_132_value_0 = const()[name = tensor<string, []>("op_132_value_0"), val = tensor<int32, []>(15)];
            tensor<int32, [?, ?]> var_132 = fill_like(ref_tensor = relative_position_if_large_1, value = var_132_value_0)[name = tensor<string, []>("op_132")];
            tensor<int32, [?, ?]> relative_position_if_large = minimum(x = relative_position_if_large_1, y = var_132)[name = tensor<string, []>("relative_position_if_large")];
            tensor<int32, [?, ?]> var_134 = select(a = relative_position, b = relative_position_if_large, cond = is_small)[name = tensor<string, []>("op_134")];
            tensor<int32, [?, ?]> input_5 = add(x = var_116, y = var_134)[name = tensor<string, []>("input_5")];
            tensor<int32, []> values_batch_dims_0 = const()[name = tensor<string, []>("values_batch_dims_0"), val = tensor<int32, []>(0)];
            tensor<bool, []> values_validate_indices_0 = const()[name = tensor<string, []>("values_validate_indices_0"), val = tensor<bool, []>(false)];
            tensor<int32, []> greater_equal_3_y_0 = const()[name = tensor<string, []>("greater_equal_3_y_0"), val = tensor<int32, []>(0)];
            tensor<bool, [?, ?]> greater_equal_3 = greater_equal(x = input_5, y = greater_equal_3_y_0)[name = tensor<string, []>("greater_equal_3")];
            tensor<int32, []> slice_by_index_3 = const()[name = tensor<string, []>("slice_by_index_3"), val = tensor<int32, []>(32)];
            tensor<int32, [?, ?]> add_3 = add(x = input_5, y = slice_by_index_3)[name = tensor<string, []>("add_3")];
            tensor<int32, [?, ?]> select_3 = select(a = input_5, b = add_3, cond = greater_equal_3)[name = tensor<string, []>("select_3")];
            tensor<int32, []> values_axis_1 = const()[name = tensor<string, []>("values_axis_1"), val = tensor<int32, []>(0)];
            tensor<fp32, [?, ?, 6]> values = gather(axis = values_axis_1, batch_dims = values_batch_dims_0, indices = select_3, validate_indices = values_validate_indices_0, x = encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight)[name = tensor<string, []>("values")];
            tensor<int32, [3]> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, [3]>([2, 0, 1])];
            tensor<int32, [1]> position_bias_1_axes_0 = const()[name = tensor<string, []>("position_bias_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [6, ?, ?]> var_139 = transpose(perm = var_138, x = values)[name = tensor<string, []>("transpose_105")];
            tensor<fp32, [1, 6, ?, ?]> position_bias_1 = expand_dims(axes = position_bias_1_axes_0, x = var_139)[name = tensor<string, []>("position_bias_1")];
            tensor<fp32, [1, 6, ?, ?]> position_bias = add(x = position_bias_1, y = mask)[name = tensor<string, []>("position_bias")];
            tensor<fp32, [1, 6, ?, ?]> scores_3 = add(x = scores_1, y = position_bias)[name = tensor<string, []>("scores_3")];
            tensor<fp32, [1, 6, ?, ?]> var_144 = softmax(axis = var_21, x = scores_3)[name = tensor<string, []>("op_144")];
            tensor<bool, []> states_7_transpose_x_0 = const()[name = tensor<string, []>("states_7_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_7_transpose_y_0 = const()[name = tensor<string, []>("states_7_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_1 = transpose(perm = value_states_1_perm_0, x = var_102)[name = tensor<string, []>("transpose_108")];
            tensor<fp32, [1, 6, ?, 64]> states_7 = matmul(transpose_x = states_7_transpose_x_0, transpose_y = states_7_transpose_y_0, x = var_144, y = value_states_1)[name = tensor<string, []>("states_7")];
            tensor<int32, [4]> var_148_perm_0 = const()[name = tensor<string, []>("op_148_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_150 = const()[name = tensor<string, []>("op_150"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_148 = transpose(perm = var_148_perm_0, x = states_7)[name = tensor<string, []>("transpose_104")];
            tensor<fp32, [1, ?, 384]> input_11 = reshape(shape = var_150, x = var_148)[name = tensor<string, []>("input_11")];
            tensor<fp32, [256]> linear_3_bias_0 = const()[name = tensor<string, []>("linear_3_bias_0"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57051456)))];
            tensor<fp32, [1, ?, 256]> input_13 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_0_SelfAttention_o_weight, x = input_11)[name = tensor<string, []>("linear_3")];
            tensor<fp32, [1, ?, 256]> hidden_states_9 = add(x = input_3, y = input_13)[name = tensor<string, []>("hidden_states_9")];
            tensor<fp32, []> var_17_promoted_1 = const()[name = tensor<string, []>("op_17_promoted_1"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_166 = pow(x = hidden_states_9, y = var_17_promoted_1)[name = tensor<string, []>("op_166")];
            tensor<int32, [1]> variance_3_axes_0 = const()[name = tensor<string, []>("variance_3_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_3_keep_dims_0 = const()[name = tensor<string, []>("variance_3_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_166)[name = tensor<string, []>("variance_3")];
            tensor<fp32, []> var_169 = const()[name = tensor<string, []>("op_169"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_170 = add(x = variance_3, y = var_169)[name = tensor<string, []>("op_170")];
            tensor<fp32, []> var_171_epsilon_0 = const()[name = tensor<string, []>("op_171_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_171 = rsqrt(epsilon = var_171_epsilon_0, x = var_170)[name = tensor<string, []>("op_171")];
            tensor<fp32, [1, ?, 256]> hidden_states_13 = mul(x = hidden_states_9, y = var_171)[name = tensor<string, []>("hidden_states_13")];
            tensor<fp32, [1, ?, 256]> input_15 = mul(x = encoder_block_0_layer_1_layer_norm_weight, y = hidden_states_13)[name = tensor<string, []>("input_15")];
            tensor<fp32, [1024]> linear_4_bias_0 = const()[name = tensor<string, []>("linear_4_bias_0"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57052544)))];
            tensor<fp32, [1, ?, 1024]> input_17 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_0_weight, x = input_15)[name = tensor<string, []>("linear_4")];
            tensor<string, []> hidden_gelu_1_mode_0 = const()[name = tensor<string, []>("hidden_gelu_1_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_1 = gelu(mode = hidden_gelu_1_mode_0, x = input_17)[name = tensor<string, []>("hidden_gelu_1")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_1 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_1_weight, x = input_15)[name = tensor<string, []>("linear_5")];
            tensor<fp32, [1, ?, 1024]> input_19 = mul(x = hidden_gelu_1, y = hidden_linear_1)[name = tensor<string, []>("input_19")];
            tensor<fp32, [1, ?, 256]> input_23 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wo_weight, x = input_19)[name = tensor<string, []>("linear_6")];
            tensor<fp32, [1, ?, 256]> hidden_states_15 = add(x = hidden_states_9, y = input_23)[name = tensor<string, []>("hidden_states_15")];
            tensor<fp32, []> var_17_promoted_2 = const()[name = tensor<string, []>("op_17_promoted_2"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_213 = pow(x = hidden_states_15, y = var_17_promoted_2)[name = tensor<string, []>("op_213")];
            tensor<int32, [1]> variance_5_axes_0 = const()[name = tensor<string, []>("variance_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_5_keep_dims_0 = const()[name = tensor<string, []>("variance_5_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_213)[name = tensor<string, []>("variance_5")];
            tensor<fp32, []> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_217 = add(x = variance_5, y = var_216)[name = tensor<string, []>("op_217")];
            tensor<fp32, []> var_218_epsilon_0 = const()[name = tensor<string, []>("op_218_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_218 = rsqrt(epsilon = var_218_epsilon_0, x = var_217)[name = tensor<string, []>("op_218")];
            tensor<fp32, [1, ?, 256]> hidden_states_19 = mul(x = hidden_states_15, y = var_218)[name = tensor<string, []>("hidden_states_19")];
            tensor<fp32, [1, ?, 256]> hidden_states_21 = mul(x = encoder_block_1_layer_0_layer_norm_weight, y = hidden_states_19)[name = tensor<string, []>("hidden_states_21")];
            tensor<fp32, [1, ?, 384]> states_9 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_q_weight, x = hidden_states_21)[name = tensor<string, []>("linear_7")];
            tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_232 = reshape(shape = var_231, x = states_9)[name = tensor<string, []>("op_232")];
            tensor<fp32, [1, ?, 384]> states_11 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_k_weight, x = hidden_states_21)[name = tensor<string, []>("linear_8")];
            tensor<int32, [4]> var_236 = const()[name = tensor<string, []>("op_236"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_237 = reshape(shape = var_236, x = states_11)[name = tensor<string, []>("op_237")];
            tensor<fp32, [1, ?, 384]> states_13 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_v_weight, x = hidden_states_21)[name = tensor<string, []>("linear_9")];
            tensor<int32, [4]> var_241 = const()[name = tensor<string, []>("op_241"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_242 = reshape(shape = var_241, x = states_13)[name = tensor<string, []>("op_242")];
            tensor<int32, [4]> value_states_3_perm_0 = const()[name = tensor<string, []>("value_states_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_38_perm_0 = const()[name = tensor<string, []>("transpose_38_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_39_perm_0 = const()[name = tensor<string, []>("transpose_39_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_39 = transpose(perm = transpose_39_perm_0, x = var_237)[name = tensor<string, []>("transpose_101")];
            tensor<fp32, [1, 6, ?, 64]> transpose_38 = transpose(perm = transpose_38_perm_0, x = var_232)[name = tensor<string, []>("transpose_102")];
            tensor<fp32, [1, 6, ?, ?]> scores_5 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_38, y = transpose_39)[name = tensor<string, []>("scores_5")];
            tensor<fp32, [1, 6, ?, ?]> scores_7 = add(x = scores_5, y = position_bias)[name = tensor<string, []>("scores_7")];
            tensor<fp32, [1, 6, ?, ?]> var_248 = softmax(axis = var_21, x = scores_7)[name = tensor<string, []>("op_248")];
            tensor<bool, []> states_15_transpose_x_0 = const()[name = tensor<string, []>("states_15_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_15_transpose_y_0 = const()[name = tensor<string, []>("states_15_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_3 = transpose(perm = value_states_3_perm_0, x = var_242)[name = tensor<string, []>("transpose_103")];
            tensor<fp32, [1, 6, ?, 64]> states_15 = matmul(transpose_x = states_15_transpose_x_0, transpose_y = states_15_transpose_y_0, x = var_248, y = value_states_3)[name = tensor<string, []>("states_15")];
            tensor<int32, [4]> var_252_perm_0 = const()[name = tensor<string, []>("op_252_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_254 = const()[name = tensor<string, []>("op_254"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_252 = transpose(perm = var_252_perm_0, x = states_15)[name = tensor<string, []>("transpose_100")];
            tensor<fp32, [1, ?, 384]> input_29 = reshape(shape = var_254, x = var_252)[name = tensor<string, []>("input_29")];
            tensor<fp32, [1, ?, 256]> input_31 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_0_SelfAttention_o_weight, x = input_29)[name = tensor<string, []>("linear_10")];
            tensor<fp32, [1, ?, 256]> hidden_states_23 = add(x = hidden_states_15, y = input_31)[name = tensor<string, []>("hidden_states_23")];
            tensor<fp32, []> var_17_promoted_3 = const()[name = tensor<string, []>("op_17_promoted_3"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_264 = pow(x = hidden_states_23, y = var_17_promoted_3)[name = tensor<string, []>("op_264")];
            tensor<int32, [1]> variance_7_axes_0 = const()[name = tensor<string, []>("variance_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_7_keep_dims_0 = const()[name = tensor<string, []>("variance_7_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_264)[name = tensor<string, []>("variance_7")];
            tensor<fp32, []> var_267 = const()[name = tensor<string, []>("op_267"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_268 = add(x = variance_7, y = var_267)[name = tensor<string, []>("op_268")];
            tensor<fp32, []> var_269_epsilon_0 = const()[name = tensor<string, []>("op_269_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_269 = rsqrt(epsilon = var_269_epsilon_0, x = var_268)[name = tensor<string, []>("op_269")];
            tensor<fp32, [1, ?, 256]> hidden_states_27 = mul(x = hidden_states_23, y = var_269)[name = tensor<string, []>("hidden_states_27")];
            tensor<fp32, [1, ?, 256]> input_33 = mul(x = encoder_block_1_layer_1_layer_norm_weight, y = hidden_states_27)[name = tensor<string, []>("input_33")];
            tensor<fp32, [1, ?, 1024]> input_35 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_0_weight, x = input_33)[name = tensor<string, []>("linear_11")];
            tensor<string, []> hidden_gelu_3_mode_0 = const()[name = tensor<string, []>("hidden_gelu_3_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_3 = gelu(mode = hidden_gelu_3_mode_0, x = input_35)[name = tensor<string, []>("hidden_gelu_3")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_3 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_1_weight, x = input_33)[name = tensor<string, []>("linear_12")];
            tensor<fp32, [1, ?, 1024]> input_37 = mul(x = hidden_gelu_3, y = hidden_linear_3)[name = tensor<string, []>("input_37")];
            tensor<fp32, [1, ?, 256]> input_41 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wo_weight, x = input_37)[name = tensor<string, []>("linear_13")];
            tensor<fp32, [1, ?, 256]> hidden_states_29 = add(x = hidden_states_23, y = input_41)[name = tensor<string, []>("hidden_states_29")];
            tensor<fp32, []> var_17_promoted_4 = const()[name = tensor<string, []>("op_17_promoted_4"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_308 = pow(x = hidden_states_29, y = var_17_promoted_4)[name = tensor<string, []>("op_308")];
            tensor<int32, [1]> variance_9_axes_0 = const()[name = tensor<string, []>("variance_9_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_9_keep_dims_0 = const()[name = tensor<string, []>("variance_9_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_308)[name = tensor<string, []>("variance_9")];
            tensor<fp32, []> var_311 = const()[name = tensor<string, []>("op_311"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_312 = add(x = variance_9, y = var_311)[name = tensor<string, []>("op_312")];
            tensor<fp32, []> var_313_epsilon_0 = const()[name = tensor<string, []>("op_313_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_313 = rsqrt(epsilon = var_313_epsilon_0, x = var_312)[name = tensor<string, []>("op_313")];
            tensor<fp32, [1, ?, 256]> hidden_states_33 = mul(x = hidden_states_29, y = var_313)[name = tensor<string, []>("hidden_states_33")];
            tensor<fp32, [1, ?, 256]> hidden_states_35 = mul(x = encoder_block_2_layer_0_layer_norm_weight, y = hidden_states_33)[name = tensor<string, []>("hidden_states_35")];
            tensor<fp32, [1, ?, 384]> states_17 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_q_weight, x = hidden_states_35)[name = tensor<string, []>("linear_14")];
            tensor<int32, [4]> var_326 = const()[name = tensor<string, []>("op_326"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_327 = reshape(shape = var_326, x = states_17)[name = tensor<string, []>("op_327")];
            tensor<fp32, [1, ?, 384]> states_19 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_k_weight, x = hidden_states_35)[name = tensor<string, []>("linear_15")];
            tensor<int32, [4]> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_332 = reshape(shape = var_331, x = states_19)[name = tensor<string, []>("op_332")];
            tensor<fp32, [1, ?, 384]> states_21 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_v_weight, x = hidden_states_35)[name = tensor<string, []>("linear_16")];
            tensor<int32, [4]> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_337 = reshape(shape = var_336, x = states_21)[name = tensor<string, []>("op_337")];
            tensor<int32, [4]> value_states_5_perm_0 = const()[name = tensor<string, []>("value_states_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_40_perm_0 = const()[name = tensor<string, []>("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_41_perm_0 = const()[name = tensor<string, []>("transpose_41_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_41 = transpose(perm = transpose_41_perm_0, x = var_332)[name = tensor<string, []>("transpose_97")];
            tensor<fp32, [1, 6, ?, 64]> transpose_40 = transpose(perm = transpose_40_perm_0, x = var_327)[name = tensor<string, []>("transpose_98")];
            tensor<fp32, [1, 6, ?, ?]> scores_9 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_40, y = transpose_41)[name = tensor<string, []>("scores_9")];
            tensor<fp32, [1, 6, ?, ?]> scores_11 = add(x = scores_9, y = position_bias)[name = tensor<string, []>("scores_11")];
            tensor<fp32, [1, 6, ?, ?]> var_343 = softmax(axis = var_21, x = scores_11)[name = tensor<string, []>("op_343")];
            tensor<bool, []> states_23_transpose_x_0 = const()[name = tensor<string, []>("states_23_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_23_transpose_y_0 = const()[name = tensor<string, []>("states_23_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_5 = transpose(perm = value_states_5_perm_0, x = var_337)[name = tensor<string, []>("transpose_99")];
            tensor<fp32, [1, 6, ?, 64]> states_23 = matmul(transpose_x = states_23_transpose_x_0, transpose_y = states_23_transpose_y_0, x = var_343, y = value_states_5)[name = tensor<string, []>("states_23")];
            tensor<int32, [4]> var_347_perm_0 = const()[name = tensor<string, []>("op_347_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_349 = const()[name = tensor<string, []>("op_349"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_347 = transpose(perm = var_347_perm_0, x = states_23)[name = tensor<string, []>("transpose_96")];
            tensor<fp32, [1, ?, 384]> input_47 = reshape(shape = var_349, x = var_347)[name = tensor<string, []>("input_47")];
            tensor<fp32, [1, ?, 256]> input_49 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_0_SelfAttention_o_weight, x = input_47)[name = tensor<string, []>("linear_17")];
            tensor<fp32, [1, ?, 256]> hidden_states_37 = add(x = hidden_states_29, y = input_49)[name = tensor<string, []>("hidden_states_37")];
            tensor<fp32, []> var_17_promoted_5 = const()[name = tensor<string, []>("op_17_promoted_5"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_359 = pow(x = hidden_states_37, y = var_17_promoted_5)[name = tensor<string, []>("op_359")];
            tensor<int32, [1]> variance_11_axes_0 = const()[name = tensor<string, []>("variance_11_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_11_keep_dims_0 = const()[name = tensor<string, []>("variance_11_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_359)[name = tensor<string, []>("variance_11")];
            tensor<fp32, []> var_362 = const()[name = tensor<string, []>("op_362"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_363 = add(x = variance_11, y = var_362)[name = tensor<string, []>("op_363")];
            tensor<fp32, []> var_364_epsilon_0 = const()[name = tensor<string, []>("op_364_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_364 = rsqrt(epsilon = var_364_epsilon_0, x = var_363)[name = tensor<string, []>("op_364")];
            tensor<fp32, [1, ?, 256]> hidden_states_41 = mul(x = hidden_states_37, y = var_364)[name = tensor<string, []>("hidden_states_41")];
            tensor<fp32, [1, ?, 256]> input_51 = mul(x = encoder_block_2_layer_1_layer_norm_weight, y = hidden_states_41)[name = tensor<string, []>("input_51")];
            tensor<fp32, [1, ?, 1024]> input_53 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_0_weight, x = input_51)[name = tensor<string, []>("linear_18")];
            tensor<string, []> hidden_gelu_5_mode_0 = const()[name = tensor<string, []>("hidden_gelu_5_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_5 = gelu(mode = hidden_gelu_5_mode_0, x = input_53)[name = tensor<string, []>("hidden_gelu_5")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_5 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_1_weight, x = input_51)[name = tensor<string, []>("linear_19")];
            tensor<fp32, [1, ?, 1024]> input_55 = mul(x = hidden_gelu_5, y = hidden_linear_5)[name = tensor<string, []>("input_55")];
            tensor<fp32, [1, ?, 256]> input_59 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wo_weight, x = input_55)[name = tensor<string, []>("linear_20")];
            tensor<fp32, [1, ?, 256]> hidden_states_43 = add(x = hidden_states_37, y = input_59)[name = tensor<string, []>("hidden_states_43")];
            tensor<fp32, []> var_17_promoted_6 = const()[name = tensor<string, []>("op_17_promoted_6"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_403 = pow(x = hidden_states_43, y = var_17_promoted_6)[name = tensor<string, []>("op_403")];
            tensor<int32, [1]> variance_13_axes_0 = const()[name = tensor<string, []>("variance_13_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_13_keep_dims_0 = const()[name = tensor<string, []>("variance_13_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_403)[name = tensor<string, []>("variance_13")];
            tensor<fp32, []> var_406 = const()[name = tensor<string, []>("op_406"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_407 = add(x = variance_13, y = var_406)[name = tensor<string, []>("op_407")];
            tensor<fp32, []> var_408_epsilon_0 = const()[name = tensor<string, []>("op_408_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_408 = rsqrt(epsilon = var_408_epsilon_0, x = var_407)[name = tensor<string, []>("op_408")];
            tensor<fp32, [1, ?, 256]> hidden_states_47 = mul(x = hidden_states_43, y = var_408)[name = tensor<string, []>("hidden_states_47")];
            tensor<fp32, [1, ?, 256]> hidden_states_49 = mul(x = encoder_block_3_layer_0_layer_norm_weight, y = hidden_states_47)[name = tensor<string, []>("hidden_states_49")];
            tensor<fp32, [1, ?, 384]> states_25 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_q_weight, x = hidden_states_49)[name = tensor<string, []>("linear_21")];
            tensor<int32, [4]> var_421 = const()[name = tensor<string, []>("op_421"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_422 = reshape(shape = var_421, x = states_25)[name = tensor<string, []>("op_422")];
            tensor<fp32, [1, ?, 384]> states_27 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_k_weight, x = hidden_states_49)[name = tensor<string, []>("linear_22")];
            tensor<int32, [4]> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_427 = reshape(shape = var_426, x = states_27)[name = tensor<string, []>("op_427")];
            tensor<fp32, [1, ?, 384]> states_29 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_v_weight, x = hidden_states_49)[name = tensor<string, []>("linear_23")];
            tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_432 = reshape(shape = var_431, x = states_29)[name = tensor<string, []>("op_432")];
            tensor<int32, [4]> value_states_7_perm_0 = const()[name = tensor<string, []>("value_states_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_13_transpose_x_0 = const()[name = tensor<string, []>("scores_13_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_13_transpose_y_0 = const()[name = tensor<string, []>("scores_13_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_42_perm_0 = const()[name = tensor<string, []>("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_43_perm_0 = const()[name = tensor<string, []>("transpose_43_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_43 = transpose(perm = transpose_43_perm_0, x = var_427)[name = tensor<string, []>("transpose_93")];
            tensor<fp32, [1, 6, ?, 64]> transpose_42 = transpose(perm = transpose_42_perm_0, x = var_422)[name = tensor<string, []>("transpose_94")];
            tensor<fp32, [1, 6, ?, ?]> scores_13 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_42, y = transpose_43)[name = tensor<string, []>("scores_13")];
            tensor<fp32, [1, 6, ?, ?]> scores_15 = add(x = scores_13, y = position_bias)[name = tensor<string, []>("scores_15")];
            tensor<fp32, [1, 6, ?, ?]> var_438 = softmax(axis = var_21, x = scores_15)[name = tensor<string, []>("op_438")];
            tensor<bool, []> states_31_transpose_x_0 = const()[name = tensor<string, []>("states_31_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_31_transpose_y_0 = const()[name = tensor<string, []>("states_31_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_7 = transpose(perm = value_states_7_perm_0, x = var_432)[name = tensor<string, []>("transpose_95")];
            tensor<fp32, [1, 6, ?, 64]> states_31 = matmul(transpose_x = states_31_transpose_x_0, transpose_y = states_31_transpose_y_0, x = var_438, y = value_states_7)[name = tensor<string, []>("states_31")];
            tensor<int32, [4]> var_442_perm_0 = const()[name = tensor<string, []>("op_442_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_442 = transpose(perm = var_442_perm_0, x = states_31)[name = tensor<string, []>("transpose_92")];
            tensor<fp32, [1, ?, 384]> input_65 = reshape(shape = var_444, x = var_442)[name = tensor<string, []>("input_65")];
            tensor<fp32, [1, ?, 256]> input_67 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_0_SelfAttention_o_weight, x = input_65)[name = tensor<string, []>("linear_24")];
            tensor<fp32, [1, ?, 256]> hidden_states_51 = add(x = hidden_states_43, y = input_67)[name = tensor<string, []>("hidden_states_51")];
            tensor<fp32, []> var_17_promoted_7 = const()[name = tensor<string, []>("op_17_promoted_7"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_454 = pow(x = hidden_states_51, y = var_17_promoted_7)[name = tensor<string, []>("op_454")];
            tensor<int32, [1]> variance_15_axes_0 = const()[name = tensor<string, []>("variance_15_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_15_keep_dims_0 = const()[name = tensor<string, []>("variance_15_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_454)[name = tensor<string, []>("variance_15")];
            tensor<fp32, []> var_457 = const()[name = tensor<string, []>("op_457"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_458 = add(x = variance_15, y = var_457)[name = tensor<string, []>("op_458")];
            tensor<fp32, []> var_459_epsilon_0 = const()[name = tensor<string, []>("op_459_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_459 = rsqrt(epsilon = var_459_epsilon_0, x = var_458)[name = tensor<string, []>("op_459")];
            tensor<fp32, [1, ?, 256]> hidden_states_55 = mul(x = hidden_states_51, y = var_459)[name = tensor<string, []>("hidden_states_55")];
            tensor<fp32, [1, ?, 256]> input_69 = mul(x = encoder_block_3_layer_1_layer_norm_weight, y = hidden_states_55)[name = tensor<string, []>("input_69")];
            tensor<fp32, [1, ?, 1024]> input_71 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_0_weight, x = input_69)[name = tensor<string, []>("linear_25")];
            tensor<string, []> hidden_gelu_7_mode_0 = const()[name = tensor<string, []>("hidden_gelu_7_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_7 = gelu(mode = hidden_gelu_7_mode_0, x = input_71)[name = tensor<string, []>("hidden_gelu_7")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_7 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_1_weight, x = input_69)[name = tensor<string, []>("linear_26")];
            tensor<fp32, [1, ?, 1024]> input_73 = mul(x = hidden_gelu_7, y = hidden_linear_7)[name = tensor<string, []>("input_73")];
            tensor<fp32, [1, ?, 256]> input_77 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wo_weight, x = input_73)[name = tensor<string, []>("linear_27")];
            tensor<fp32, [1, ?, 256]> hidden_states_57 = add(x = hidden_states_51, y = input_77)[name = tensor<string, []>("hidden_states_57")];
            tensor<fp32, []> var_17_promoted_8 = const()[name = tensor<string, []>("op_17_promoted_8"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_498 = pow(x = hidden_states_57, y = var_17_promoted_8)[name = tensor<string, []>("op_498")];
            tensor<int32, [1]> variance_17_axes_0 = const()[name = tensor<string, []>("variance_17_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_17_keep_dims_0 = const()[name = tensor<string, []>("variance_17_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_498)[name = tensor<string, []>("variance_17")];
            tensor<fp32, []> var_501 = const()[name = tensor<string, []>("op_501"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_502 = add(x = variance_17, y = var_501)[name = tensor<string, []>("op_502")];
            tensor<fp32, []> var_503_epsilon_0 = const()[name = tensor<string, []>("op_503_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_503 = rsqrt(epsilon = var_503_epsilon_0, x = var_502)[name = tensor<string, []>("op_503")];
            tensor<fp32, [1, ?, 256]> hidden_states_61 = mul(x = hidden_states_57, y = var_503)[name = tensor<string, []>("hidden_states_61")];
            tensor<fp32, [1, ?, 256]> hidden_states_63 = mul(x = encoder_block_4_layer_0_layer_norm_weight, y = hidden_states_61)[name = tensor<string, []>("hidden_states_63")];
            tensor<fp32, [1, ?, 384]> states_33 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_q_weight, x = hidden_states_63)[name = tensor<string, []>("linear_28")];
            tensor<int32, [4]> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_517 = reshape(shape = var_516, x = states_33)[name = tensor<string, []>("op_517")];
            tensor<fp32, [1, ?, 384]> states_35 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_k_weight, x = hidden_states_63)[name = tensor<string, []>("linear_29")];
            tensor<int32, [4]> var_521 = const()[name = tensor<string, []>("op_521"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_522 = reshape(shape = var_521, x = states_35)[name = tensor<string, []>("op_522")];
            tensor<fp32, [1, ?, 384]> states_37 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_v_weight, x = hidden_states_63)[name = tensor<string, []>("linear_30")];
            tensor<int32, [4]> var_526 = const()[name = tensor<string, []>("op_526"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_527 = reshape(shape = var_526, x = states_37)[name = tensor<string, []>("op_527")];
            tensor<int32, [4]> value_states_9_perm_0 = const()[name = tensor<string, []>("value_states_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_17_transpose_x_0 = const()[name = tensor<string, []>("scores_17_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_17_transpose_y_0 = const()[name = tensor<string, []>("scores_17_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_44_perm_0 = const()[name = tensor<string, []>("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_45_perm_0 = const()[name = tensor<string, []>("transpose_45_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_45 = transpose(perm = transpose_45_perm_0, x = var_522)[name = tensor<string, []>("transpose_89")];
            tensor<fp32, [1, 6, ?, 64]> transpose_44 = transpose(perm = transpose_44_perm_0, x = var_517)[name = tensor<string, []>("transpose_90")];
            tensor<fp32, [1, 6, ?, ?]> scores_17 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_44, y = transpose_45)[name = tensor<string, []>("scores_17")];
            tensor<fp32, [1, 6, ?, ?]> scores_19 = add(x = scores_17, y = position_bias)[name = tensor<string, []>("scores_19")];
            tensor<fp32, [1, 6, ?, ?]> var_533 = softmax(axis = var_21, x = scores_19)[name = tensor<string, []>("op_533")];
            tensor<bool, []> states_39_transpose_x_0 = const()[name = tensor<string, []>("states_39_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_39_transpose_y_0 = const()[name = tensor<string, []>("states_39_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_9 = transpose(perm = value_states_9_perm_0, x = var_527)[name = tensor<string, []>("transpose_91")];
            tensor<fp32, [1, 6, ?, 64]> states_39 = matmul(transpose_x = states_39_transpose_x_0, transpose_y = states_39_transpose_y_0, x = var_533, y = value_states_9)[name = tensor<string, []>("states_39")];
            tensor<int32, [4]> var_537_perm_0 = const()[name = tensor<string, []>("op_537_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_537 = transpose(perm = var_537_perm_0, x = states_39)[name = tensor<string, []>("transpose_88")];
            tensor<fp32, [1, ?, 384]> input_83 = reshape(shape = var_539, x = var_537)[name = tensor<string, []>("input_83")];
            tensor<fp32, [1, ?, 256]> input_85 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_0_SelfAttention_o_weight, x = input_83)[name = tensor<string, []>("linear_31")];
            tensor<fp32, [1, ?, 256]> hidden_states_65 = add(x = hidden_states_57, y = input_85)[name = tensor<string, []>("hidden_states_65")];
            tensor<fp32, []> var_17_promoted_9 = const()[name = tensor<string, []>("op_17_promoted_9"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_549 = pow(x = hidden_states_65, y = var_17_promoted_9)[name = tensor<string, []>("op_549")];
            tensor<int32, [1]> variance_19_axes_0 = const()[name = tensor<string, []>("variance_19_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_19_keep_dims_0 = const()[name = tensor<string, []>("variance_19_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_549)[name = tensor<string, []>("variance_19")];
            tensor<fp32, []> var_552 = const()[name = tensor<string, []>("op_552"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_553 = add(x = variance_19, y = var_552)[name = tensor<string, []>("op_553")];
            tensor<fp32, []> var_554_epsilon_0 = const()[name = tensor<string, []>("op_554_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_554 = rsqrt(epsilon = var_554_epsilon_0, x = var_553)[name = tensor<string, []>("op_554")];
            tensor<fp32, [1, ?, 256]> hidden_states_69 = mul(x = hidden_states_65, y = var_554)[name = tensor<string, []>("hidden_states_69")];
            tensor<fp32, [1, ?, 256]> input_87 = mul(x = encoder_block_4_layer_1_layer_norm_weight, y = hidden_states_69)[name = tensor<string, []>("input_87")];
            tensor<fp32, [1, ?, 1024]> input_89 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_0_weight, x = input_87)[name = tensor<string, []>("linear_32")];
            tensor<string, []> hidden_gelu_9_mode_0 = const()[name = tensor<string, []>("hidden_gelu_9_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_9 = gelu(mode = hidden_gelu_9_mode_0, x = input_89)[name = tensor<string, []>("hidden_gelu_9")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_9 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_1_weight, x = input_87)[name = tensor<string, []>("linear_33")];
            tensor<fp32, [1, ?, 1024]> input_91 = mul(x = hidden_gelu_9, y = hidden_linear_9)[name = tensor<string, []>("input_91")];
            tensor<fp32, [1, ?, 256]> input_95 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wo_weight, x = input_91)[name = tensor<string, []>("linear_34")];
            tensor<fp32, [1, ?, 256]> hidden_states_71 = add(x = hidden_states_65, y = input_95)[name = tensor<string, []>("hidden_states_71")];
            tensor<fp32, []> var_17_promoted_10 = const()[name = tensor<string, []>("op_17_promoted_10"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_593 = pow(x = hidden_states_71, y = var_17_promoted_10)[name = tensor<string, []>("op_593")];
            tensor<int32, [1]> variance_21_axes_0 = const()[name = tensor<string, []>("variance_21_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_21_keep_dims_0 = const()[name = tensor<string, []>("variance_21_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_593)[name = tensor<string, []>("variance_21")];
            tensor<fp32, []> var_596 = const()[name = tensor<string, []>("op_596"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_597 = add(x = variance_21, y = var_596)[name = tensor<string, []>("op_597")];
            tensor<fp32, []> var_598_epsilon_0 = const()[name = tensor<string, []>("op_598_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_598 = rsqrt(epsilon = var_598_epsilon_0, x = var_597)[name = tensor<string, []>("op_598")];
            tensor<fp32, [1, ?, 256]> hidden_states_75 = mul(x = hidden_states_71, y = var_598)[name = tensor<string, []>("hidden_states_75")];
            tensor<fp32, [1, ?, 256]> hidden_states_77 = mul(x = encoder_block_5_layer_0_layer_norm_weight, y = hidden_states_75)[name = tensor<string, []>("hidden_states_77")];
            tensor<fp32, [1, ?, 384]> states_41 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_q_weight, x = hidden_states_77)[name = tensor<string, []>("linear_35")];
            tensor<int32, [4]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_612 = reshape(shape = var_611, x = states_41)[name = tensor<string, []>("op_612")];
            tensor<fp32, [1, ?, 384]> states_43 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_k_weight, x = hidden_states_77)[name = tensor<string, []>("linear_36")];
            tensor<int32, [4]> var_616 = const()[name = tensor<string, []>("op_616"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_617 = reshape(shape = var_616, x = states_43)[name = tensor<string, []>("op_617")];
            tensor<fp32, [1, ?, 384]> states_45 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_v_weight, x = hidden_states_77)[name = tensor<string, []>("linear_37")];
            tensor<int32, [4]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_622 = reshape(shape = var_621, x = states_45)[name = tensor<string, []>("op_622")];
            tensor<int32, [4]> value_states_11_perm_0 = const()[name = tensor<string, []>("value_states_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_21_transpose_x_0 = const()[name = tensor<string, []>("scores_21_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_21_transpose_y_0 = const()[name = tensor<string, []>("scores_21_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_46_perm_0 = const()[name = tensor<string, []>("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_47_perm_0 = const()[name = tensor<string, []>("transpose_47_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_47 = transpose(perm = transpose_47_perm_0, x = var_617)[name = tensor<string, []>("transpose_85")];
            tensor<fp32, [1, 6, ?, 64]> transpose_46 = transpose(perm = transpose_46_perm_0, x = var_612)[name = tensor<string, []>("transpose_86")];
            tensor<fp32, [1, 6, ?, ?]> scores_21 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_46, y = transpose_47)[name = tensor<string, []>("scores_21")];
            tensor<fp32, [1, 6, ?, ?]> scores_23 = add(x = scores_21, y = position_bias)[name = tensor<string, []>("scores_23")];
            tensor<fp32, [1, 6, ?, ?]> var_628 = softmax(axis = var_21, x = scores_23)[name = tensor<string, []>("op_628")];
            tensor<bool, []> states_47_transpose_x_0 = const()[name = tensor<string, []>("states_47_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_47_transpose_y_0 = const()[name = tensor<string, []>("states_47_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_11 = transpose(perm = value_states_11_perm_0, x = var_622)[name = tensor<string, []>("transpose_87")];
            tensor<fp32, [1, 6, ?, 64]> states_47 = matmul(transpose_x = states_47_transpose_x_0, transpose_y = states_47_transpose_y_0, x = var_628, y = value_states_11)[name = tensor<string, []>("states_47")];
            tensor<int32, [4]> var_632_perm_0 = const()[name = tensor<string, []>("op_632_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_634 = const()[name = tensor<string, []>("op_634"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_632 = transpose(perm = var_632_perm_0, x = states_47)[name = tensor<string, []>("transpose_84")];
            tensor<fp32, [1, ?, 384]> input_101 = reshape(shape = var_634, x = var_632)[name = tensor<string, []>("input_101")];
            tensor<fp32, [1, ?, 256]> input_103 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_0_SelfAttention_o_weight, x = input_101)[name = tensor<string, []>("linear_38")];
            tensor<fp32, [1, ?, 256]> hidden_states_79 = add(x = hidden_states_71, y = input_103)[name = tensor<string, []>("hidden_states_79")];
            tensor<fp32, []> var_17_promoted_11 = const()[name = tensor<string, []>("op_17_promoted_11"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_644 = pow(x = hidden_states_79, y = var_17_promoted_11)[name = tensor<string, []>("op_644")];
            tensor<int32, [1]> variance_23_axes_0 = const()[name = tensor<string, []>("variance_23_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_23_keep_dims_0 = const()[name = tensor<string, []>("variance_23_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_644)[name = tensor<string, []>("variance_23")];
            tensor<fp32, []> var_647 = const()[name = tensor<string, []>("op_647"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_648 = add(x = variance_23, y = var_647)[name = tensor<string, []>("op_648")];
            tensor<fp32, []> var_649_epsilon_0 = const()[name = tensor<string, []>("op_649_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_649 = rsqrt(epsilon = var_649_epsilon_0, x = var_648)[name = tensor<string, []>("op_649")];
            tensor<fp32, [1, ?, 256]> hidden_states_83 = mul(x = hidden_states_79, y = var_649)[name = tensor<string, []>("hidden_states_83")];
            tensor<fp32, [1, ?, 256]> input_105 = mul(x = encoder_block_5_layer_1_layer_norm_weight, y = hidden_states_83)[name = tensor<string, []>("input_105")];
            tensor<fp32, [1, ?, 1024]> input_107 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_0_weight, x = input_105)[name = tensor<string, []>("linear_39")];
            tensor<string, []> hidden_gelu_11_mode_0 = const()[name = tensor<string, []>("hidden_gelu_11_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_11 = gelu(mode = hidden_gelu_11_mode_0, x = input_107)[name = tensor<string, []>("hidden_gelu_11")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_11 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_1_weight, x = input_105)[name = tensor<string, []>("linear_40")];
            tensor<fp32, [1, ?, 1024]> input_109 = mul(x = hidden_gelu_11, y = hidden_linear_11)[name = tensor<string, []>("input_109")];
            tensor<fp32, [1, ?, 256]> input_113 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wo_weight, x = input_109)[name = tensor<string, []>("linear_41")];
            tensor<fp32, [1, ?, 256]> hidden_states_85 = add(x = hidden_states_79, y = input_113)[name = tensor<string, []>("hidden_states_85")];
            tensor<fp32, []> var_17_promoted_12 = const()[name = tensor<string, []>("op_17_promoted_12"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_688 = pow(x = hidden_states_85, y = var_17_promoted_12)[name = tensor<string, []>("op_688")];
            tensor<int32, [1]> variance_25_axes_0 = const()[name = tensor<string, []>("variance_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_25_keep_dims_0 = const()[name = tensor<string, []>("variance_25_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_688)[name = tensor<string, []>("variance_25")];
            tensor<fp32, []> var_691 = const()[name = tensor<string, []>("op_691"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_692 = add(x = variance_25, y = var_691)[name = tensor<string, []>("op_692")];
            tensor<fp32, []> var_693_epsilon_0 = const()[name = tensor<string, []>("op_693_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_693 = rsqrt(epsilon = var_693_epsilon_0, x = var_692)[name = tensor<string, []>("op_693")];
            tensor<fp32, [1, ?, 256]> hidden_states_89 = mul(x = hidden_states_85, y = var_693)[name = tensor<string, []>("hidden_states_89")];
            tensor<fp32, [1, ?, 256]> hidden_states_91 = mul(x = encoder_block_6_layer_0_layer_norm_weight, y = hidden_states_89)[name = tensor<string, []>("hidden_states_91")];
            tensor<fp32, [1, ?, 384]> states_49 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_q_weight, x = hidden_states_91)[name = tensor<string, []>("linear_42")];
            tensor<int32, [4]> var_706 = const()[name = tensor<string, []>("op_706"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_707 = reshape(shape = var_706, x = states_49)[name = tensor<string, []>("op_707")];
            tensor<fp32, [1, ?, 384]> states_51 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_k_weight, x = hidden_states_91)[name = tensor<string, []>("linear_43")];
            tensor<int32, [4]> var_711 = const()[name = tensor<string, []>("op_711"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_712 = reshape(shape = var_711, x = states_51)[name = tensor<string, []>("op_712")];
            tensor<fp32, [1, ?, 384]> states_53 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_v_weight, x = hidden_states_91)[name = tensor<string, []>("linear_44")];
            tensor<int32, [4]> var_716 = const()[name = tensor<string, []>("op_716"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_717 = reshape(shape = var_716, x = states_53)[name = tensor<string, []>("op_717")];
            tensor<int32, [4]> value_states_13_perm_0 = const()[name = tensor<string, []>("value_states_13_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_25_transpose_x_0 = const()[name = tensor<string, []>("scores_25_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_25_transpose_y_0 = const()[name = tensor<string, []>("scores_25_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_48_perm_0 = const()[name = tensor<string, []>("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_49_perm_0 = const()[name = tensor<string, []>("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_49 = transpose(perm = transpose_49_perm_0, x = var_712)[name = tensor<string, []>("transpose_81")];
            tensor<fp32, [1, 6, ?, 64]> transpose_48 = transpose(perm = transpose_48_perm_0, x = var_707)[name = tensor<string, []>("transpose_82")];
            tensor<fp32, [1, 6, ?, ?]> scores_25 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor<string, []>("scores_25")];
            tensor<fp32, [1, 6, ?, ?]> scores_27 = add(x = scores_25, y = position_bias)[name = tensor<string, []>("scores_27")];
            tensor<fp32, [1, 6, ?, ?]> var_723 = softmax(axis = var_21, x = scores_27)[name = tensor<string, []>("op_723")];
            tensor<bool, []> states_55_transpose_x_0 = const()[name = tensor<string, []>("states_55_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_55_transpose_y_0 = const()[name = tensor<string, []>("states_55_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_13 = transpose(perm = value_states_13_perm_0, x = var_717)[name = tensor<string, []>("transpose_83")];
            tensor<fp32, [1, 6, ?, 64]> states_55 = matmul(transpose_x = states_55_transpose_x_0, transpose_y = states_55_transpose_y_0, x = var_723, y = value_states_13)[name = tensor<string, []>("states_55")];
            tensor<int32, [4]> var_727_perm_0 = const()[name = tensor<string, []>("op_727_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_729 = const()[name = tensor<string, []>("op_729"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_727 = transpose(perm = var_727_perm_0, x = states_55)[name = tensor<string, []>("transpose_80")];
            tensor<fp32, [1, ?, 384]> input_119 = reshape(shape = var_729, x = var_727)[name = tensor<string, []>("input_119")];
            tensor<fp32, [1, ?, 256]> input_121 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_0_SelfAttention_o_weight, x = input_119)[name = tensor<string, []>("linear_45")];
            tensor<fp32, [1, ?, 256]> hidden_states_93 = add(x = hidden_states_85, y = input_121)[name = tensor<string, []>("hidden_states_93")];
            tensor<fp32, []> var_17_promoted_13 = const()[name = tensor<string, []>("op_17_promoted_13"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_739 = pow(x = hidden_states_93, y = var_17_promoted_13)[name = tensor<string, []>("op_739")];
            tensor<int32, [1]> variance_27_axes_0 = const()[name = tensor<string, []>("variance_27_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_27_keep_dims_0 = const()[name = tensor<string, []>("variance_27_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_739)[name = tensor<string, []>("variance_27")];
            tensor<fp32, []> var_742 = const()[name = tensor<string, []>("op_742"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_743 = add(x = variance_27, y = var_742)[name = tensor<string, []>("op_743")];
            tensor<fp32, []> var_744_epsilon_0 = const()[name = tensor<string, []>("op_744_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_744 = rsqrt(epsilon = var_744_epsilon_0, x = var_743)[name = tensor<string, []>("op_744")];
            tensor<fp32, [1, ?, 256]> hidden_states_97 = mul(x = hidden_states_93, y = var_744)[name = tensor<string, []>("hidden_states_97")];
            tensor<fp32, [1, ?, 256]> input_123 = mul(x = encoder_block_6_layer_1_layer_norm_weight, y = hidden_states_97)[name = tensor<string, []>("input_123")];
            tensor<fp32, [1, ?, 1024]> input_125 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_0_weight, x = input_123)[name = tensor<string, []>("linear_46")];
            tensor<string, []> hidden_gelu_13_mode_0 = const()[name = tensor<string, []>("hidden_gelu_13_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_13 = gelu(mode = hidden_gelu_13_mode_0, x = input_125)[name = tensor<string, []>("hidden_gelu_13")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_13 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_1_weight, x = input_123)[name = tensor<string, []>("linear_47")];
            tensor<fp32, [1, ?, 1024]> input_127 = mul(x = hidden_gelu_13, y = hidden_linear_13)[name = tensor<string, []>("input_127")];
            tensor<fp32, [1, ?, 256]> input_131 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wo_weight, x = input_127)[name = tensor<string, []>("linear_48")];
            tensor<fp32, [1, ?, 256]> hidden_states_99 = add(x = hidden_states_93, y = input_131)[name = tensor<string, []>("hidden_states_99")];
            tensor<fp32, []> var_17_promoted_14 = const()[name = tensor<string, []>("op_17_promoted_14"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_783 = pow(x = hidden_states_99, y = var_17_promoted_14)[name = tensor<string, []>("op_783")];
            tensor<int32, [1]> variance_29_axes_0 = const()[name = tensor<string, []>("variance_29_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_29_keep_dims_0 = const()[name = tensor<string, []>("variance_29_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_783)[name = tensor<string, []>("variance_29")];
            tensor<fp32, []> var_786 = const()[name = tensor<string, []>("op_786"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_787 = add(x = variance_29, y = var_786)[name = tensor<string, []>("op_787")];
            tensor<fp32, []> var_788_epsilon_0 = const()[name = tensor<string, []>("op_788_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_788 = rsqrt(epsilon = var_788_epsilon_0, x = var_787)[name = tensor<string, []>("op_788")];
            tensor<fp32, [1, ?, 256]> hidden_states_103 = mul(x = hidden_states_99, y = var_788)[name = tensor<string, []>("hidden_states_103")];
            tensor<fp32, [1, ?, 256]> hidden_states_105 = mul(x = encoder_block_7_layer_0_layer_norm_weight, y = hidden_states_103)[name = tensor<string, []>("hidden_states_105")];
            tensor<fp32, [1, ?, 384]> states_57 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_q_weight, x = hidden_states_105)[name = tensor<string, []>("linear_49")];
            tensor<int32, [4]> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_802 = reshape(shape = var_801, x = states_57)[name = tensor<string, []>("op_802")];
            tensor<fp32, [1, ?, 384]> states_59 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_k_weight, x = hidden_states_105)[name = tensor<string, []>("linear_50")];
            tensor<int32, [4]> var_806 = const()[name = tensor<string, []>("op_806"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_807 = reshape(shape = var_806, x = states_59)[name = tensor<string, []>("op_807")];
            tensor<fp32, [1, ?, 384]> states_61 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_v_weight, x = hidden_states_105)[name = tensor<string, []>("linear_51")];
            tensor<int32, [4]> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_812 = reshape(shape = var_811, x = states_61)[name = tensor<string, []>("op_812")];
            tensor<int32, [4]> value_states_15_perm_0 = const()[name = tensor<string, []>("value_states_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_29_transpose_x_0 = const()[name = tensor<string, []>("scores_29_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_29_transpose_y_0 = const()[name = tensor<string, []>("scores_29_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_50_perm_0 = const()[name = tensor<string, []>("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_51_perm_0 = const()[name = tensor<string, []>("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_51 = transpose(perm = transpose_51_perm_0, x = var_807)[name = tensor<string, []>("transpose_77")];
            tensor<fp32, [1, 6, ?, 64]> transpose_50 = transpose(perm = transpose_50_perm_0, x = var_802)[name = tensor<string, []>("transpose_78")];
            tensor<fp32, [1, 6, ?, ?]> scores_29 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor<string, []>("scores_29")];
            tensor<fp32, [1, 6, ?, ?]> scores_31 = add(x = scores_29, y = position_bias)[name = tensor<string, []>("scores_31")];
            tensor<fp32, [1, 6, ?, ?]> var_818 = softmax(axis = var_21, x = scores_31)[name = tensor<string, []>("op_818")];
            tensor<bool, []> states_63_transpose_x_0 = const()[name = tensor<string, []>("states_63_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_63_transpose_y_0 = const()[name = tensor<string, []>("states_63_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_15 = transpose(perm = value_states_15_perm_0, x = var_812)[name = tensor<string, []>("transpose_79")];
            tensor<fp32, [1, 6, ?, 64]> states_63 = matmul(transpose_x = states_63_transpose_x_0, transpose_y = states_63_transpose_y_0, x = var_818, y = value_states_15)[name = tensor<string, []>("states_63")];
            tensor<int32, [4]> var_822_perm_0 = const()[name = tensor<string, []>("op_822_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_824 = const()[name = tensor<string, []>("op_824"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_822 = transpose(perm = var_822_perm_0, x = states_63)[name = tensor<string, []>("transpose_76")];
            tensor<fp32, [1, ?, 384]> input_137 = reshape(shape = var_824, x = var_822)[name = tensor<string, []>("input_137")];
            tensor<fp32, [1, ?, 256]> input_139 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_0_SelfAttention_o_weight, x = input_137)[name = tensor<string, []>("linear_52")];
            tensor<fp32, [1, ?, 256]> hidden_states_107 = add(x = hidden_states_99, y = input_139)[name = tensor<string, []>("hidden_states_107")];
            tensor<fp32, []> var_17_promoted_15 = const()[name = tensor<string, []>("op_17_promoted_15"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_834 = pow(x = hidden_states_107, y = var_17_promoted_15)[name = tensor<string, []>("op_834")];
            tensor<int32, [1]> variance_31_axes_0 = const()[name = tensor<string, []>("variance_31_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_31_keep_dims_0 = const()[name = tensor<string, []>("variance_31_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_834)[name = tensor<string, []>("variance_31")];
            tensor<fp32, []> var_837 = const()[name = tensor<string, []>("op_837"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_838 = add(x = variance_31, y = var_837)[name = tensor<string, []>("op_838")];
            tensor<fp32, []> var_839_epsilon_0 = const()[name = tensor<string, []>("op_839_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_839 = rsqrt(epsilon = var_839_epsilon_0, x = var_838)[name = tensor<string, []>("op_839")];
            tensor<fp32, [1, ?, 256]> hidden_states_111 = mul(x = hidden_states_107, y = var_839)[name = tensor<string, []>("hidden_states_111")];
            tensor<fp32, [1, ?, 256]> input_141 = mul(x = encoder_block_7_layer_1_layer_norm_weight, y = hidden_states_111)[name = tensor<string, []>("input_141")];
            tensor<fp32, [1, ?, 1024]> input_143 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_0_weight, x = input_141)[name = tensor<string, []>("linear_53")];
            tensor<string, []> hidden_gelu_15_mode_0 = const()[name = tensor<string, []>("hidden_gelu_15_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_15 = gelu(mode = hidden_gelu_15_mode_0, x = input_143)[name = tensor<string, []>("hidden_gelu_15")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_15 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_1_weight, x = input_141)[name = tensor<string, []>("linear_54")];
            tensor<fp32, [1, ?, 1024]> input_145 = mul(x = hidden_gelu_15, y = hidden_linear_15)[name = tensor<string, []>("input_145")];
            tensor<fp32, [1, ?, 256]> input_149 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wo_weight, x = input_145)[name = tensor<string, []>("linear_55")];
            tensor<fp32, [1, ?, 256]> hidden_states_113 = add(x = hidden_states_107, y = input_149)[name = tensor<string, []>("hidden_states_113")];
            tensor<fp32, []> var_17_promoted_16 = const()[name = tensor<string, []>("op_17_promoted_16"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_878 = pow(x = hidden_states_113, y = var_17_promoted_16)[name = tensor<string, []>("op_878")];
            tensor<int32, [1]> variance_33_axes_0 = const()[name = tensor<string, []>("variance_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_33_keep_dims_0 = const()[name = tensor<string, []>("variance_33_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_878)[name = tensor<string, []>("variance_33")];
            tensor<fp32, []> var_881 = const()[name = tensor<string, []>("op_881"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_882 = add(x = variance_33, y = var_881)[name = tensor<string, []>("op_882")];
            tensor<fp32, []> var_883_epsilon_0 = const()[name = tensor<string, []>("op_883_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_883 = rsqrt(epsilon = var_883_epsilon_0, x = var_882)[name = tensor<string, []>("op_883")];
            tensor<fp32, [1, ?, 256]> hidden_states_117 = mul(x = hidden_states_113, y = var_883)[name = tensor<string, []>("hidden_states_117")];
            tensor<fp32, [1, ?, 256]> hidden_states_119 = mul(x = encoder_block_8_layer_0_layer_norm_weight, y = hidden_states_117)[name = tensor<string, []>("hidden_states_119")];
            tensor<fp32, [1, ?, 384]> states_65 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_q_weight, x = hidden_states_119)[name = tensor<string, []>("linear_56")];
            tensor<int32, [4]> var_896 = const()[name = tensor<string, []>("op_896"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_897 = reshape(shape = var_896, x = states_65)[name = tensor<string, []>("op_897")];
            tensor<fp32, [1, ?, 384]> states_67 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_k_weight, x = hidden_states_119)[name = tensor<string, []>("linear_57")];
            tensor<int32, [4]> var_901 = const()[name = tensor<string, []>("op_901"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_902 = reshape(shape = var_901, x = states_67)[name = tensor<string, []>("op_902")];
            tensor<fp32, [1, ?, 384]> states_69 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_v_weight, x = hidden_states_119)[name = tensor<string, []>("linear_58")];
            tensor<int32, [4]> var_906 = const()[name = tensor<string, []>("op_906"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_907 = reshape(shape = var_906, x = states_69)[name = tensor<string, []>("op_907")];
            tensor<int32, [4]> value_states_17_perm_0 = const()[name = tensor<string, []>("value_states_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_33_transpose_x_0 = const()[name = tensor<string, []>("scores_33_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_33_transpose_y_0 = const()[name = tensor<string, []>("scores_33_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_52_perm_0 = const()[name = tensor<string, []>("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_53_perm_0 = const()[name = tensor<string, []>("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_53 = transpose(perm = transpose_53_perm_0, x = var_902)[name = tensor<string, []>("transpose_73")];
            tensor<fp32, [1, 6, ?, 64]> transpose_52 = transpose(perm = transpose_52_perm_0, x = var_897)[name = tensor<string, []>("transpose_74")];
            tensor<fp32, [1, 6, ?, ?]> scores_33 = matmul(transpose_x = scores_33_transpose_x_0, transpose_y = scores_33_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor<string, []>("scores_33")];
            tensor<fp32, [1, 6, ?, ?]> scores_35 = add(x = scores_33, y = position_bias)[name = tensor<string, []>("scores_35")];
            tensor<fp32, [1, 6, ?, ?]> var_913 = softmax(axis = var_21, x = scores_35)[name = tensor<string, []>("op_913")];
            tensor<bool, []> states_71_transpose_x_0 = const()[name = tensor<string, []>("states_71_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_71_transpose_y_0 = const()[name = tensor<string, []>("states_71_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_17 = transpose(perm = value_states_17_perm_0, x = var_907)[name = tensor<string, []>("transpose_75")];
            tensor<fp32, [1, 6, ?, 64]> states_71 = matmul(transpose_x = states_71_transpose_x_0, transpose_y = states_71_transpose_y_0, x = var_913, y = value_states_17)[name = tensor<string, []>("states_71")];
            tensor<int32, [4]> var_917_perm_0 = const()[name = tensor<string, []>("op_917_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_919 = const()[name = tensor<string, []>("op_919"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_917 = transpose(perm = var_917_perm_0, x = states_71)[name = tensor<string, []>("transpose_72")];
            tensor<fp32, [1, ?, 384]> input_155 = reshape(shape = var_919, x = var_917)[name = tensor<string, []>("input_155")];
            tensor<fp32, [1, ?, 256]> input_157 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_0_SelfAttention_o_weight, x = input_155)[name = tensor<string, []>("linear_59")];
            tensor<fp32, [1, ?, 256]> hidden_states_121 = add(x = hidden_states_113, y = input_157)[name = tensor<string, []>("hidden_states_121")];
            tensor<fp32, []> var_17_promoted_17 = const()[name = tensor<string, []>("op_17_promoted_17"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_929 = pow(x = hidden_states_121, y = var_17_promoted_17)[name = tensor<string, []>("op_929")];
            tensor<int32, [1]> variance_35_axes_0 = const()[name = tensor<string, []>("variance_35_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_35_keep_dims_0 = const()[name = tensor<string, []>("variance_35_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_929)[name = tensor<string, []>("variance_35")];
            tensor<fp32, []> var_932 = const()[name = tensor<string, []>("op_932"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_933 = add(x = variance_35, y = var_932)[name = tensor<string, []>("op_933")];
            tensor<fp32, []> var_934_epsilon_0 = const()[name = tensor<string, []>("op_934_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_934 = rsqrt(epsilon = var_934_epsilon_0, x = var_933)[name = tensor<string, []>("op_934")];
            tensor<fp32, [1, ?, 256]> hidden_states_125 = mul(x = hidden_states_121, y = var_934)[name = tensor<string, []>("hidden_states_125")];
            tensor<fp32, [1, ?, 256]> input_159 = mul(x = encoder_block_8_layer_1_layer_norm_weight, y = hidden_states_125)[name = tensor<string, []>("input_159")];
            tensor<fp32, [1, ?, 1024]> input_161 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_0_weight, x = input_159)[name = tensor<string, []>("linear_60")];
            tensor<string, []> hidden_gelu_17_mode_0 = const()[name = tensor<string, []>("hidden_gelu_17_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_17 = gelu(mode = hidden_gelu_17_mode_0, x = input_161)[name = tensor<string, []>("hidden_gelu_17")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_17 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_1_weight, x = input_159)[name = tensor<string, []>("linear_61")];
            tensor<fp32, [1, ?, 1024]> input_163 = mul(x = hidden_gelu_17, y = hidden_linear_17)[name = tensor<string, []>("input_163")];
            tensor<fp32, [1, ?, 256]> input_167 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wo_weight, x = input_163)[name = tensor<string, []>("linear_62")];
            tensor<fp32, [1, ?, 256]> hidden_states_127 = add(x = hidden_states_121, y = input_167)[name = tensor<string, []>("hidden_states_127")];
            tensor<fp32, []> var_17_promoted_18 = const()[name = tensor<string, []>("op_17_promoted_18"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_973 = pow(x = hidden_states_127, y = var_17_promoted_18)[name = tensor<string, []>("op_973")];
            tensor<int32, [1]> variance_37_axes_0 = const()[name = tensor<string, []>("variance_37_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_37_keep_dims_0 = const()[name = tensor<string, []>("variance_37_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_973)[name = tensor<string, []>("variance_37")];
            tensor<fp32, []> var_976 = const()[name = tensor<string, []>("op_976"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_977 = add(x = variance_37, y = var_976)[name = tensor<string, []>("op_977")];
            tensor<fp32, []> var_978_epsilon_0 = const()[name = tensor<string, []>("op_978_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_978 = rsqrt(epsilon = var_978_epsilon_0, x = var_977)[name = tensor<string, []>("op_978")];
            tensor<fp32, [1, ?, 256]> hidden_states_131 = mul(x = hidden_states_127, y = var_978)[name = tensor<string, []>("hidden_states_131")];
            tensor<fp32, [1, ?, 256]> hidden_states_133 = mul(x = encoder_block_9_layer_0_layer_norm_weight, y = hidden_states_131)[name = tensor<string, []>("hidden_states_133")];
            tensor<fp32, [1, ?, 384]> states_73 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_q_weight, x = hidden_states_133)[name = tensor<string, []>("linear_63")];
            tensor<int32, [4]> var_991 = const()[name = tensor<string, []>("op_991"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_992 = reshape(shape = var_991, x = states_73)[name = tensor<string, []>("op_992")];
            tensor<fp32, [1, ?, 384]> states_75 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_k_weight, x = hidden_states_133)[name = tensor<string, []>("linear_64")];
            tensor<int32, [4]> var_996 = const()[name = tensor<string, []>("op_996"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_997 = reshape(shape = var_996, x = states_75)[name = tensor<string, []>("op_997")];
            tensor<fp32, [1, ?, 384]> states_77 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_v_weight, x = hidden_states_133)[name = tensor<string, []>("linear_65")];
            tensor<int32, [4]> var_1001 = const()[name = tensor<string, []>("op_1001"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1002 = reshape(shape = var_1001, x = states_77)[name = tensor<string, []>("op_1002")];
            tensor<int32, [4]> value_states_19_perm_0 = const()[name = tensor<string, []>("value_states_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_37_transpose_x_0 = const()[name = tensor<string, []>("scores_37_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_37_transpose_y_0 = const()[name = tensor<string, []>("scores_37_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_54_perm_0 = const()[name = tensor<string, []>("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_55_perm_0 = const()[name = tensor<string, []>("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_55 = transpose(perm = transpose_55_perm_0, x = var_997)[name = tensor<string, []>("transpose_69")];
            tensor<fp32, [1, 6, ?, 64]> transpose_54 = transpose(perm = transpose_54_perm_0, x = var_992)[name = tensor<string, []>("transpose_70")];
            tensor<fp32, [1, 6, ?, ?]> scores_37 = matmul(transpose_x = scores_37_transpose_x_0, transpose_y = scores_37_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor<string, []>("scores_37")];
            tensor<fp32, [1, 6, ?, ?]> scores_39 = add(x = scores_37, y = position_bias)[name = tensor<string, []>("scores_39")];
            tensor<fp32, [1, 6, ?, ?]> var_1008 = softmax(axis = var_21, x = scores_39)[name = tensor<string, []>("op_1008")];
            tensor<bool, []> states_79_transpose_x_0 = const()[name = tensor<string, []>("states_79_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_79_transpose_y_0 = const()[name = tensor<string, []>("states_79_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_19 = transpose(perm = value_states_19_perm_0, x = var_1002)[name = tensor<string, []>("transpose_71")];
            tensor<fp32, [1, 6, ?, 64]> states_79 = matmul(transpose_x = states_79_transpose_x_0, transpose_y = states_79_transpose_y_0, x = var_1008, y = value_states_19)[name = tensor<string, []>("states_79")];
            tensor<int32, [4]> var_1012_perm_0 = const()[name = tensor<string, []>("op_1012_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1014 = const()[name = tensor<string, []>("op_1014"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_1012 = transpose(perm = var_1012_perm_0, x = states_79)[name = tensor<string, []>("transpose_68")];
            tensor<fp32, [1, ?, 384]> input_173 = reshape(shape = var_1014, x = var_1012)[name = tensor<string, []>("input_173")];
            tensor<fp32, [1, ?, 256]> input_175 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_0_SelfAttention_o_weight, x = input_173)[name = tensor<string, []>("linear_66")];
            tensor<fp32, [1, ?, 256]> hidden_states_135 = add(x = hidden_states_127, y = input_175)[name = tensor<string, []>("hidden_states_135")];
            tensor<fp32, []> var_17_promoted_19 = const()[name = tensor<string, []>("op_17_promoted_19"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1024 = pow(x = hidden_states_135, y = var_17_promoted_19)[name = tensor<string, []>("op_1024")];
            tensor<int32, [1]> variance_39_axes_0 = const()[name = tensor<string, []>("variance_39_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_39_keep_dims_0 = const()[name = tensor<string, []>("variance_39_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1024)[name = tensor<string, []>("variance_39")];
            tensor<fp32, []> var_1027 = const()[name = tensor<string, []>("op_1027"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1028 = add(x = variance_39, y = var_1027)[name = tensor<string, []>("op_1028")];
            tensor<fp32, []> var_1029_epsilon_0 = const()[name = tensor<string, []>("op_1029_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1029 = rsqrt(epsilon = var_1029_epsilon_0, x = var_1028)[name = tensor<string, []>("op_1029")];
            tensor<fp32, [1, ?, 256]> hidden_states_139 = mul(x = hidden_states_135, y = var_1029)[name = tensor<string, []>("hidden_states_139")];
            tensor<fp32, [1, ?, 256]> input_177 = mul(x = encoder_block_9_layer_1_layer_norm_weight, y = hidden_states_139)[name = tensor<string, []>("input_177")];
            tensor<fp32, [1, ?, 1024]> input_179 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_0_weight, x = input_177)[name = tensor<string, []>("linear_67")];
            tensor<string, []> hidden_gelu_19_mode_0 = const()[name = tensor<string, []>("hidden_gelu_19_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_19 = gelu(mode = hidden_gelu_19_mode_0, x = input_179)[name = tensor<string, []>("hidden_gelu_19")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_19 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_1_weight, x = input_177)[name = tensor<string, []>("linear_68")];
            tensor<fp32, [1, ?, 1024]> input_181 = mul(x = hidden_gelu_19, y = hidden_linear_19)[name = tensor<string, []>("input_181")];
            tensor<fp32, [1, ?, 256]> input_185 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wo_weight, x = input_181)[name = tensor<string, []>("linear_69")];
            tensor<fp32, [1, ?, 256]> hidden_states_141 = add(x = hidden_states_135, y = input_185)[name = tensor<string, []>("hidden_states_141")];
            tensor<fp32, []> var_17_promoted_20 = const()[name = tensor<string, []>("op_17_promoted_20"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1068 = pow(x = hidden_states_141, y = var_17_promoted_20)[name = tensor<string, []>("op_1068")];
            tensor<int32, [1]> variance_41_axes_0 = const()[name = tensor<string, []>("variance_41_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_41_keep_dims_0 = const()[name = tensor<string, []>("variance_41_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_41 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1068)[name = tensor<string, []>("variance_41")];
            tensor<fp32, []> var_1071 = const()[name = tensor<string, []>("op_1071"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1072 = add(x = variance_41, y = var_1071)[name = tensor<string, []>("op_1072")];
            tensor<fp32, []> var_1073_epsilon_0 = const()[name = tensor<string, []>("op_1073_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1073 = rsqrt(epsilon = var_1073_epsilon_0, x = var_1072)[name = tensor<string, []>("op_1073")];
            tensor<fp32, [1, ?, 256]> hidden_states_145 = mul(x = hidden_states_141, y = var_1073)[name = tensor<string, []>("hidden_states_145")];
            tensor<fp32, [1, ?, 256]> hidden_states_147 = mul(x = encoder_block_10_layer_0_layer_norm_weight, y = hidden_states_145)[name = tensor<string, []>("hidden_states_147")];
            tensor<fp32, [1, ?, 384]> states_81 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_q_weight, x = hidden_states_147)[name = tensor<string, []>("linear_70")];
            tensor<int32, [4]> var_1086 = const()[name = tensor<string, []>("op_1086"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1087 = reshape(shape = var_1086, x = states_81)[name = tensor<string, []>("op_1087")];
            tensor<fp32, [1, ?, 384]> states_83 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_k_weight, x = hidden_states_147)[name = tensor<string, []>("linear_71")];
            tensor<int32, [4]> var_1091 = const()[name = tensor<string, []>("op_1091"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1092 = reshape(shape = var_1091, x = states_83)[name = tensor<string, []>("op_1092")];
            tensor<fp32, [1, ?, 384]> states_85 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_v_weight, x = hidden_states_147)[name = tensor<string, []>("linear_72")];
            tensor<int32, [4]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1097 = reshape(shape = var_1096, x = states_85)[name = tensor<string, []>("op_1097")];
            tensor<int32, [4]> value_states_21_perm_0 = const()[name = tensor<string, []>("value_states_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_41_transpose_x_0 = const()[name = tensor<string, []>("scores_41_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_41_transpose_y_0 = const()[name = tensor<string, []>("scores_41_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_56_perm_0 = const()[name = tensor<string, []>("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_57_perm_0 = const()[name = tensor<string, []>("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_57 = transpose(perm = transpose_57_perm_0, x = var_1092)[name = tensor<string, []>("transpose_65")];
            tensor<fp32, [1, 6, ?, 64]> transpose_56 = transpose(perm = transpose_56_perm_0, x = var_1087)[name = tensor<string, []>("transpose_66")];
            tensor<fp32, [1, 6, ?, ?]> scores_41 = matmul(transpose_x = scores_41_transpose_x_0, transpose_y = scores_41_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor<string, []>("scores_41")];
            tensor<fp32, [1, 6, ?, ?]> scores_43 = add(x = scores_41, y = position_bias)[name = tensor<string, []>("scores_43")];
            tensor<fp32, [1, 6, ?, ?]> var_1103 = softmax(axis = var_21, x = scores_43)[name = tensor<string, []>("op_1103")];
            tensor<bool, []> states_87_transpose_x_0 = const()[name = tensor<string, []>("states_87_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_87_transpose_y_0 = const()[name = tensor<string, []>("states_87_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states_21 = transpose(perm = value_states_21_perm_0, x = var_1097)[name = tensor<string, []>("transpose_67")];
            tensor<fp32, [1, 6, ?, 64]> states_87 = matmul(transpose_x = states_87_transpose_x_0, transpose_y = states_87_transpose_y_0, x = var_1103, y = value_states_21)[name = tensor<string, []>("states_87")];
            tensor<int32, [4]> var_1107_perm_0 = const()[name = tensor<string, []>("op_1107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_1107 = transpose(perm = var_1107_perm_0, x = states_87)[name = tensor<string, []>("transpose_64")];
            tensor<fp32, [1, ?, 384]> input_191 = reshape(shape = var_1109, x = var_1107)[name = tensor<string, []>("input_191")];
            tensor<fp32, [1, ?, 256]> input_193 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_0_SelfAttention_o_weight, x = input_191)[name = tensor<string, []>("linear_73")];
            tensor<fp32, [1, ?, 256]> hidden_states_149 = add(x = hidden_states_141, y = input_193)[name = tensor<string, []>("hidden_states_149")];
            tensor<fp32, []> var_17_promoted_21 = const()[name = tensor<string, []>("op_17_promoted_21"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1119 = pow(x = hidden_states_149, y = var_17_promoted_21)[name = tensor<string, []>("op_1119")];
            tensor<int32, [1]> variance_43_axes_0 = const()[name = tensor<string, []>("variance_43_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_43_keep_dims_0 = const()[name = tensor<string, []>("variance_43_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_43 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1119)[name = tensor<string, []>("variance_43")];
            tensor<fp32, []> var_1122 = const()[name = tensor<string, []>("op_1122"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1123 = add(x = variance_43, y = var_1122)[name = tensor<string, []>("op_1123")];
            tensor<fp32, []> var_1124_epsilon_0 = const()[name = tensor<string, []>("op_1124_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1124 = rsqrt(epsilon = var_1124_epsilon_0, x = var_1123)[name = tensor<string, []>("op_1124")];
            tensor<fp32, [1, ?, 256]> hidden_states_153 = mul(x = hidden_states_149, y = var_1124)[name = tensor<string, []>("hidden_states_153")];
            tensor<fp32, [1, ?, 256]> input_195 = mul(x = encoder_block_10_layer_1_layer_norm_weight, y = hidden_states_153)[name = tensor<string, []>("input_195")];
            tensor<fp32, [1, ?, 1024]> input_197 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_0_weight, x = input_195)[name = tensor<string, []>("linear_74")];
            tensor<string, []> hidden_gelu_21_mode_0 = const()[name = tensor<string, []>("hidden_gelu_21_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu_21 = gelu(mode = hidden_gelu_21_mode_0, x = input_197)[name = tensor<string, []>("hidden_gelu_21")];
            tensor<fp32, [1, ?, 1024]> hidden_linear_21 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_1_weight, x = input_195)[name = tensor<string, []>("linear_75")];
            tensor<fp32, [1, ?, 1024]> input_199 = mul(x = hidden_gelu_21, y = hidden_linear_21)[name = tensor<string, []>("input_199")];
            tensor<fp32, [1, ?, 256]> input_203 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wo_weight, x = input_199)[name = tensor<string, []>("linear_76")];
            tensor<fp32, [1, ?, 256]> hidden_states_155 = add(x = hidden_states_149, y = input_203)[name = tensor<string, []>("hidden_states_155")];
            tensor<fp32, []> var_17_promoted_22 = const()[name = tensor<string, []>("op_17_promoted_22"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1163 = pow(x = hidden_states_155, y = var_17_promoted_22)[name = tensor<string, []>("op_1163")];
            tensor<int32, [1]> variance_45_axes_0 = const()[name = tensor<string, []>("variance_45_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_45_keep_dims_0 = const()[name = tensor<string, []>("variance_45_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_45 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1163)[name = tensor<string, []>("variance_45")];
            tensor<fp32, []> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1167 = add(x = variance_45, y = var_1166)[name = tensor<string, []>("op_1167")];
            tensor<fp32, []> var_1168_epsilon_0 = const()[name = tensor<string, []>("op_1168_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1168 = rsqrt(epsilon = var_1168_epsilon_0, x = var_1167)[name = tensor<string, []>("op_1168")];
            tensor<fp32, [1, ?, 256]> hidden_states_159 = mul(x = hidden_states_155, y = var_1168)[name = tensor<string, []>("hidden_states_159")];
            tensor<fp32, [1, ?, 256]> hidden_states_161 = mul(x = encoder_block_11_layer_0_layer_norm_weight, y = hidden_states_159)[name = tensor<string, []>("hidden_states_161")];
            tensor<fp32, [1, ?, 384]> states_89 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_q_weight, x = hidden_states_161)[name = tensor<string, []>("linear_77")];
            tensor<int32, [4]> var_1181 = const()[name = tensor<string, []>("op_1181"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1182 = reshape(shape = var_1181, x = states_89)[name = tensor<string, []>("op_1182")];
            tensor<fp32, [1, ?, 384]> states_91 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_k_weight, x = hidden_states_161)[name = tensor<string, []>("linear_78")];
            tensor<int32, [4]> var_1186 = const()[name = tensor<string, []>("op_1186"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1187 = reshape(shape = var_1186, x = states_91)[name = tensor<string, []>("op_1187")];
            tensor<fp32, [1, ?, 384]> states_93 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_v_weight, x = hidden_states_161)[name = tensor<string, []>("linear_79")];
            tensor<int32, [4]> var_1191 = const()[name = tensor<string, []>("op_1191"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp32, [1, ?, 6, 64]> var_1192 = reshape(shape = var_1191, x = states_93)[name = tensor<string, []>("op_1192")];
            tensor<int32, [4]> value_states_perm_0 = const()[name = tensor<string, []>("value_states_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<bool, []> scores_45_transpose_x_0 = const()[name = tensor<string, []>("scores_45_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> scores_45_transpose_y_0 = const()[name = tensor<string, []>("scores_45_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_58_perm_0 = const()[name = tensor<string, []>("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_59_perm_0 = const()[name = tensor<string, []>("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp32, [1, 6, 64, ?]> transpose_59 = transpose(perm = transpose_59_perm_0, x = var_1187)[name = tensor<string, []>("transpose_61")];
            tensor<fp32, [1, 6, ?, 64]> transpose_58 = transpose(perm = transpose_58_perm_0, x = var_1182)[name = tensor<string, []>("transpose_62")];
            tensor<fp32, [1, 6, ?, ?]> scores_45 = matmul(transpose_x = scores_45_transpose_x_0, transpose_y = scores_45_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor<string, []>("scores_45")];
            tensor<fp32, [1, 6, ?, ?]> scores = add(x = scores_45, y = position_bias)[name = tensor<string, []>("scores")];
            tensor<fp32, [1, 6, ?, ?]> var_1198 = softmax(axis = var_21, x = scores)[name = tensor<string, []>("op_1198")];
            tensor<bool, []> states_transpose_x_0 = const()[name = tensor<string, []>("states_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> states_transpose_y_0 = const()[name = tensor<string, []>("states_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 6, ?, 64]> value_states = transpose(perm = value_states_perm_0, x = var_1192)[name = tensor<string, []>("transpose_63")];
            tensor<fp32, [1, 6, ?, 64]> states = matmul(transpose_x = states_transpose_x_0, transpose_y = states_transpose_y_0, x = var_1198, y = value_states)[name = tensor<string, []>("states")];
            tensor<int32, [4]> var_1202_perm_0 = const()[name = tensor<string, []>("op_1202_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1204 = const()[name = tensor<string, []>("op_1204"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp32, [1, ?, 6, 64]> var_1202 = transpose(perm = var_1202_perm_0, x = states)[name = tensor<string, []>("transpose_60")];
            tensor<fp32, [1, ?, 384]> input_209 = reshape(shape = var_1204, x = var_1202)[name = tensor<string, []>("input_209")];
            tensor<fp32, [1, ?, 256]> input_211 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_0_SelfAttention_o_weight, x = input_209)[name = tensor<string, []>("linear_80")];
            tensor<fp32, [1, ?, 256]> hidden_states_163 = add(x = hidden_states_155, y = input_211)[name = tensor<string, []>("hidden_states_163")];
            tensor<fp32, []> var_17_promoted_23 = const()[name = tensor<string, []>("op_17_promoted_23"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1214 = pow(x = hidden_states_163, y = var_17_promoted_23)[name = tensor<string, []>("op_1214")];
            tensor<int32, [1]> variance_47_axes_0 = const()[name = tensor<string, []>("variance_47_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_47_keep_dims_0 = const()[name = tensor<string, []>("variance_47_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance_47 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1214)[name = tensor<string, []>("variance_47")];
            tensor<fp32, []> var_1217 = const()[name = tensor<string, []>("op_1217"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1218 = add(x = variance_47, y = var_1217)[name = tensor<string, []>("op_1218")];
            tensor<fp32, []> var_1219_epsilon_0 = const()[name = tensor<string, []>("op_1219_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1219 = rsqrt(epsilon = var_1219_epsilon_0, x = var_1218)[name = tensor<string, []>("op_1219")];
            tensor<fp32, [1, ?, 256]> hidden_states_167 = mul(x = hidden_states_163, y = var_1219)[name = tensor<string, []>("hidden_states_167")];
            tensor<fp32, [1, ?, 256]> input_213 = mul(x = encoder_block_11_layer_1_layer_norm_weight, y = hidden_states_167)[name = tensor<string, []>("input_213")];
            tensor<fp32, [1, ?, 1024]> input_215 = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_0_weight, x = input_213)[name = tensor<string, []>("linear_81")];
            tensor<string, []> hidden_gelu_mode_0 = const()[name = tensor<string, []>("hidden_gelu_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp32, [1, ?, 1024]> hidden_gelu = gelu(mode = hidden_gelu_mode_0, x = input_215)[name = tensor<string, []>("hidden_gelu")];
            tensor<fp32, [1, ?, 1024]> hidden_linear = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_1_weight, x = input_213)[name = tensor<string, []>("linear_82")];
            tensor<fp32, [1, ?, 1024]> input_217 = mul(x = hidden_gelu, y = hidden_linear)[name = tensor<string, []>("input_217")];
            tensor<fp32, [1, ?, 256]> input_221 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wo_weight, x = input_217)[name = tensor<string, []>("linear_83")];
            tensor<fp32, [1, ?, 256]> hidden_states_169 = add(x = hidden_states_163, y = input_221)[name = tensor<string, []>("hidden_states_169")];
            tensor<fp32, []> var_17_promoted_24 = const()[name = tensor<string, []>("op_17_promoted_24"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, ?, 256]> var_1252 = pow(x = hidden_states_169, y = var_17_promoted_24)[name = tensor<string, []>("op_1252")];
            tensor<int32, [1]> variance_axes_0 = const()[name = tensor<string, []>("variance_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_keep_dims_0 = const()[name = tensor<string, []>("variance_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, ?, 1]> variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_1252)[name = tensor<string, []>("variance")];
            tensor<fp32, []> var_1255 = const()[name = tensor<string, []>("op_1255"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, ?, 1]> var_1256 = add(x = variance, y = var_1255)[name = tensor<string, []>("op_1256")];
            tensor<fp32, []> var_1257_epsilon_0 = const()[name = tensor<string, []>("op_1257_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, ?, 1]> var_1257 = rsqrt(epsilon = var_1257_epsilon_0, x = var_1256)[name = tensor<string, []>("op_1257")];
            tensor<fp32, [1, ?, 256]> hidden_states = mul(x = hidden_states_169, y = var_1257)[name = tensor<string, []>("hidden_states")];
            tensor<fp32, [1, ?, 256]> last_hidden_state = mul(x = encoder_final_layer_norm_weight, y = hidden_states)[name = tensor<string, []>("input")];
        } -> (last_hidden_state);
}