File size: 153,849 Bytes
cc7b0cd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 | program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
{
func main<ios17>(tensor<int32, [1, ?]> attention_mask, tensor<int32, [1, ?]> input_ids) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"attention_mask", [1, 16]}, {"input_ids", [1, 16]}}), ("RangeDims", {{"attention_mask", [[1, 1], [1, 64]]}, {"input_ids", [[1, 1], [1, 64]]}})))] {
tensor<fp32, [384, 256]> encoder_embed_tokens_weight = const()[name = tensor<string, []>("encoder_embed_tokens_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
tensor<fp32, [256]> encoder_block_0_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(393344)))];
tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394432)))];
tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(787712)))];
tensor<fp32, [384, 256]> encoder_block_0_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1180992)))];
tensor<fp32, [32, 6]> encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight"), val = tensor<fp32, [32, 6]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1574272)))];
tensor<fp32, [256, 384]> encoder_block_0_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_0_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1575104)))];
tensor<fp32, [256]> encoder_block_0_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1968384)))];
tensor<fp32, [1024, 256]> encoder_block_0_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1969472)))];
tensor<fp32, [1024, 256]> encoder_block_0_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3018112)))];
tensor<fp32, [256, 1024]> encoder_block_0_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_0_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4066752)))];
tensor<fp32, [256]> encoder_block_1_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5115392)))];
tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5116480)))];
tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5509760)))];
tensor<fp32, [384, 256]> encoder_block_1_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5903040)))];
tensor<fp32, [256, 384]> encoder_block_1_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_1_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6296320)))];
tensor<fp32, [256]> encoder_block_1_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6689600)))];
tensor<fp32, [1024, 256]> encoder_block_1_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6690688)))];
tensor<fp32, [1024, 256]> encoder_block_1_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7739328)))];
tensor<fp32, [256, 1024]> encoder_block_1_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_1_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8787968)))];
tensor<fp32, [256]> encoder_block_2_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9836608)))];
tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9837696)))];
tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10230976)))];
tensor<fp32, [384, 256]> encoder_block_2_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10624256)))];
tensor<fp32, [256, 384]> encoder_block_2_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_2_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11017536)))];
tensor<fp32, [256]> encoder_block_2_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11410816)))];
tensor<fp32, [1024, 256]> encoder_block_2_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11411904)))];
tensor<fp32, [1024, 256]> encoder_block_2_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12460544)))];
tensor<fp32, [256, 1024]> encoder_block_2_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_2_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13509184)))];
tensor<fp32, [256]> encoder_block_3_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14557824)))];
tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14558912)))];
tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14952192)))];
tensor<fp32, [384, 256]> encoder_block_3_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15345472)))];
tensor<fp32, [256, 384]> encoder_block_3_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_3_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15738752)))];
tensor<fp32, [256]> encoder_block_3_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16132032)))];
tensor<fp32, [1024, 256]> encoder_block_3_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16133120)))];
tensor<fp32, [1024, 256]> encoder_block_3_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17181760)))];
tensor<fp32, [256, 1024]> encoder_block_3_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_3_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18230400)))];
tensor<fp32, [256]> encoder_block_4_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19279040)))];
tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19280128)))];
tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19673408)))];
tensor<fp32, [384, 256]> encoder_block_4_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20066688)))];
tensor<fp32, [256, 384]> encoder_block_4_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_4_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20459968)))];
tensor<fp32, [256]> encoder_block_4_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20853248)))];
tensor<fp32, [1024, 256]> encoder_block_4_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20854336)))];
tensor<fp32, [1024, 256]> encoder_block_4_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21902976)))];
tensor<fp32, [256, 1024]> encoder_block_4_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_4_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22951616)))];
tensor<fp32, [256]> encoder_block_5_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24000256)))];
tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24001344)))];
tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24394624)))];
tensor<fp32, [384, 256]> encoder_block_5_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24787904)))];
tensor<fp32, [256, 384]> encoder_block_5_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_5_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25181184)))];
tensor<fp32, [256]> encoder_block_5_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25574464)))];
tensor<fp32, [1024, 256]> encoder_block_5_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25575552)))];
tensor<fp32, [1024, 256]> encoder_block_5_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26624192)))];
tensor<fp32, [256, 1024]> encoder_block_5_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_5_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27672832)))];
tensor<fp32, [256]> encoder_block_6_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28721472)))];
tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28722560)))];
tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29115840)))];
tensor<fp32, [384, 256]> encoder_block_6_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29509120)))];
tensor<fp32, [256, 384]> encoder_block_6_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_6_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29902400)))];
tensor<fp32, [256]> encoder_block_6_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30295680)))];
tensor<fp32, [1024, 256]> encoder_block_6_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30296768)))];
tensor<fp32, [1024, 256]> encoder_block_6_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31345408)))];
tensor<fp32, [256, 1024]> encoder_block_6_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_6_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32394048)))];
tensor<fp32, [256]> encoder_block_7_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33442688)))];
tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33443776)))];
tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33837056)))];
tensor<fp32, [384, 256]> encoder_block_7_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34230336)))];
tensor<fp32, [256, 384]> encoder_block_7_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_7_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34623616)))];
tensor<fp32, [256]> encoder_block_7_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35016896)))];
tensor<fp32, [1024, 256]> encoder_block_7_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35017984)))];
tensor<fp32, [1024, 256]> encoder_block_7_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36066624)))];
tensor<fp32, [256, 1024]> encoder_block_7_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_7_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37115264)))];
tensor<fp32, [256]> encoder_block_8_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38163904)))];
tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38164992)))];
tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38558272)))];
tensor<fp32, [384, 256]> encoder_block_8_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38951552)))];
tensor<fp32, [256, 384]> encoder_block_8_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_8_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39344832)))];
tensor<fp32, [256]> encoder_block_8_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39738112)))];
tensor<fp32, [1024, 256]> encoder_block_8_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39739200)))];
tensor<fp32, [1024, 256]> encoder_block_8_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40787840)))];
tensor<fp32, [256, 1024]> encoder_block_8_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_8_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41836480)))];
tensor<fp32, [256]> encoder_block_9_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42885120)))];
tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42886208)))];
tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43279488)))];
tensor<fp32, [384, 256]> encoder_block_9_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43672768)))];
tensor<fp32, [256, 384]> encoder_block_9_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_9_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44066048)))];
tensor<fp32, [256]> encoder_block_9_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44459328)))];
tensor<fp32, [1024, 256]> encoder_block_9_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44460416)))];
tensor<fp32, [1024, 256]> encoder_block_9_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45509056)))];
tensor<fp32, [256, 1024]> encoder_block_9_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_9_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46557696)))];
tensor<fp32, [256]> encoder_block_10_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47606336)))];
tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47607424)))];
tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48000704)))];
tensor<fp32, [384, 256]> encoder_block_10_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48393984)))];
tensor<fp32, [256, 384]> encoder_block_10_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_10_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48787264)))];
tensor<fp32, [256]> encoder_block_10_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49180544)))];
tensor<fp32, [1024, 256]> encoder_block_10_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49181632)))];
tensor<fp32, [1024, 256]> encoder_block_10_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50230272)))];
tensor<fp32, [256, 1024]> encoder_block_10_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_10_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51278912)))];
tensor<fp32, [256]> encoder_block_11_layer_0_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52327552)))];
tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_q_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_q_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52328640)))];
tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_k_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_k_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52721920)))];
tensor<fp32, [384, 256]> encoder_block_11_layer_0_SelfAttention_v_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_v_weight"), val = tensor<fp32, [384, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53115200)))];
tensor<fp32, [256, 384]> encoder_block_11_layer_0_SelfAttention_o_weight = const()[name = tensor<string, []>("encoder_block_11_layer_0_SelfAttention_o_weight"), val = tensor<fp32, [256, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53508480)))];
tensor<fp32, [256]> encoder_block_11_layer_1_layer_norm_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53901760)))];
tensor<fp32, [1024, 256]> encoder_block_11_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wi_0_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53902848)))];
tensor<fp32, [1024, 256]> encoder_block_11_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wi_1_weight"), val = tensor<fp32, [1024, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54951488)))];
tensor<fp32, [256, 1024]> encoder_block_11_layer_1_DenseReluDense_wo_weight = const()[name = tensor<string, []>("encoder_block_11_layer_1_DenseReluDense_wo_weight"), val = tensor<fp32, [256, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56000128)))];
tensor<fp32, [256]> encoder_final_layer_norm_weight = const()[name = tensor<string, []>("encoder_final_layer_norm_weight"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57048768)))];
tensor<int32, []> var_7 = const()[name = tensor<string, []>("op_7"), val = tensor<int32, []>(8)];
tensor<fp32, []> var_13 = const()[name = tensor<string, []>("op_13"), val = tensor<fp32, []>(0x1p+0)];
tensor<int32, []> var_19 = const()[name = tensor<string, []>("op_19"), val = tensor<int32, []>(0)];
tensor<int32, []> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, []>(-1)];
tensor<int32, []> input_3_batch_dims_0 = const()[name = tensor<string, []>("input_3_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<bool, []> input_3_validate_indices_0 = const()[name = tensor<string, []>("input_3_validate_indices_0"), val = tensor<bool, []>(false)];
tensor<int32, []> greater_equal_1_y_0 = const()[name = tensor<string, []>("greater_equal_1_y_0"), val = tensor<int32, []>(0)];
tensor<bool, [1, ?]> greater_equal_1 = greater_equal(x = input_ids, y = greater_equal_1_y_0)[name = tensor<string, []>("greater_equal_1")];
tensor<int32, []> slice_by_index_1 = const()[name = tensor<string, []>("slice_by_index_1"), val = tensor<int32, []>(384)];
tensor<int32, [1, ?]> add_1 = add(x = input_ids, y = slice_by_index_1)[name = tensor<string, []>("add_1")];
tensor<int32, [1, ?]> select_1 = select(a = input_ids, b = add_1, cond = greater_equal_1)[name = tensor<string, []>("select_1")];
tensor<int32, []> input_3_axis_1 = const()[name = tensor<string, []>("input_3_axis_1"), val = tensor<int32, []>(0)];
tensor<fp32, [1, ?, 256]> input_3 = gather(axis = input_3_axis_1, batch_dims = input_3_batch_dims_0, indices = select_1, validate_indices = input_3_validate_indices_0, x = encoder_embed_tokens_weight)[name = tensor<string, []>("input_3")];
tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [1, 1, ?]> var_55 = expand_dims(axes = var_55_axes_0, x = attention_mask)[name = tensor<string, []>("op_55")];
tensor<int32, [1]> var_56_axes_0 = const()[name = tensor<string, []>("op_56_axes_0"), val = tensor<int32, [1]>([2])];
tensor<int32, [1, 1, 1, ?]> var_56 = expand_dims(axes = var_56_axes_0, x = var_55)[name = tensor<string, []>("op_56")];
tensor<string, []> var_58_dtype_0 = const()[name = tensor<string, []>("op_58_dtype_0"), val = tensor<string, []>("fp32")];
tensor<fp32, [1, 1, 1, ?]> var_58 = cast(dtype = var_58_dtype_0, x = var_56)[name = tensor<string, []>("cast_58")];
tensor<fp32, [1, 1, 1, ?]> var_59 = sub(x = var_13, y = var_58)[name = tensor<string, []>("op_59")];
tensor<fp32, []> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<fp32, []>(-0x1.fffffep+127)];
tensor<fp32, [1, 1, 1, ?]> mask = mul(x = var_59, y = var_60)[name = tensor<string, []>("mask")];
tensor<fp32, []> var_17_promoted = const()[name = tensor<string, []>("op_17_promoted"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_71 = pow(x = input_3, y = var_17_promoted)[name = tensor<string, []>("op_71")];
tensor<int32, [1]> variance_1_axes_0 = const()[name = tensor<string, []>("variance_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_1_keep_dims_0 = const()[name = tensor<string, []>("variance_1_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_71)[name = tensor<string, []>("variance_1")];
tensor<fp32, []> var_74 = const()[name = tensor<string, []>("op_74"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_75 = add(x = variance_1, y = var_74)[name = tensor<string, []>("op_75")];
tensor<fp32, []> var_76_epsilon_0 = const()[name = tensor<string, []>("op_76_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_76 = rsqrt(epsilon = var_76_epsilon_0, x = var_75)[name = tensor<string, []>("op_76")];
tensor<fp32, [1, ?, 256]> hidden_states_5 = mul(x = input_3, y = var_76)[name = tensor<string, []>("hidden_states_5")];
tensor<fp32, [1, ?, 256]> hidden_states_7 = mul(x = encoder_block_0_layer_0_layer_norm_weight, y = hidden_states_5)[name = tensor<string, []>("hidden_states_7")];
tensor<int32, [3]> var_88_shape = shape(x = hidden_states_7)[name = tensor<string, []>("op_88_shape")];
tensor<int32, []> gather_2_batch_dims_0 = const()[name = tensor<string, []>("gather_2_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<bool, []> gather_2_validate_indices_0 = const()[name = tensor<string, []>("gather_2_validate_indices_0"), val = tensor<bool, []>(false)];
tensor<int32, []> select_2 = const()[name = tensor<string, []>("select_2"), val = tensor<int32, []>(1)];
tensor<int32, []> gather_2_axis_1 = const()[name = tensor<string, []>("gather_2_axis_1"), val = tensor<int32, []>(0)];
tensor<int32, []> gather_2 = gather(axis = gather_2_axis_1, batch_dims = gather_2_batch_dims_0, indices = select_2, validate_indices = gather_2_validate_indices_0, x = var_88_shape)[name = tensor<string, []>("gather_2")];
tensor<fp32, [384]> linear_0_bias_0 = const()[name = tensor<string, []>("linear_0_bias_0"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57049856)))];
tensor<fp32, [1, ?, 384]> states_1 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_q_weight, x = hidden_states_7)[name = tensor<string, []>("linear_0")];
tensor<int32, [4]> var_91 = const()[name = tensor<string, []>("op_91"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_92 = reshape(shape = var_91, x = states_1)[name = tensor<string, []>("op_92")];
tensor<fp32, [1, ?, 384]> states_3 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_k_weight, x = hidden_states_7)[name = tensor<string, []>("linear_1")];
tensor<int32, [4]> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_97 = reshape(shape = var_96, x = states_3)[name = tensor<string, []>("op_97")];
tensor<fp32, [1, ?, 384]> states_5 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_v_weight, x = hidden_states_7)[name = tensor<string, []>("linear_2")];
tensor<int32, [4]> var_101 = const()[name = tensor<string, []>("op_101"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_102 = reshape(shape = var_101, x = states_5)[name = tensor<string, []>("op_102")];
tensor<int32, [4]> value_states_1_perm_0 = const()[name = tensor<string, []>("value_states_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_36_perm_0 = const()[name = tensor<string, []>("transpose_36_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_37_perm_0 = const()[name = tensor<string, []>("transpose_37_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_37 = transpose(perm = transpose_37_perm_0, x = var_97)[name = tensor<string, []>("transpose_106")];
tensor<fp32, [1, 6, ?, 64]> transpose_36 = transpose(perm = transpose_36_perm_0, x = var_92)[name = tensor<string, []>("transpose_107")];
tensor<fp32, [1, 6, ?, ?]> scores_1 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_36, y = transpose_37)[name = tensor<string, []>("scores_1")];
tensor<int32, []> const_0 = const()[name = tensor<string, []>("const_0"), val = tensor<int32, []>(0)];
tensor<int32, []> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<int32, []>(1)];
tensor<int32, [?]> var_106 = range_1d(end = gather_2, start = const_0, step = const_1)[name = tensor<string, []>("op_106")];
tensor<int32, [1]> context_position_axes_0 = const()[name = tensor<string, []>("context_position_axes_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [?, 1]> context_position = expand_dims(axes = context_position_axes_0, x = var_106)[name = tensor<string, []>("context_position")];
tensor<int32, [1]> var_110_axes_0 = const()[name = tensor<string, []>("op_110_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1, ?]> var_110 = expand_dims(axes = var_110_axes_0, x = var_106)[name = tensor<string, []>("op_110")];
tensor<int32, [?, ?]> relative_position_1 = sub(x = var_110, y = context_position)[name = tensor<string, []>("relative_position_1")];
tensor<bool, [?, ?]> var_113 = greater(x = relative_position_1, y = var_19)[name = tensor<string, []>("op_113")];
tensor<string, []> var_114_dtype_0 = const()[name = tensor<string, []>("op_114_dtype_0"), val = tensor<string, []>("int32")];
tensor<int32, []> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, []>(16)];
tensor<int32, [?, ?]> var_114 = cast(dtype = var_114_dtype_0, x = var_113)[name = tensor<string, []>("cast_57")];
tensor<int32, [?, ?]> var_116 = mul(x = var_114, y = var_115)[name = tensor<string, []>("op_116")];
tensor<int32, [?, ?]> relative_position = abs(x = relative_position_1)[name = tensor<string, []>("relative_position")];
tensor<bool, [?, ?]> is_small = less(x = relative_position, y = var_7)[name = tensor<string, []>("is_small")];
tensor<string, []> var_121_dtype_0 = const()[name = tensor<string, []>("op_121_dtype_0"), val = tensor<string, []>("fp32")];
tensor<fp32, []> _inversed_123_y_0 = const()[name = tensor<string, []>("_inversed_123_y_0"), val = tensor<fp32, []>(0x1p-3)];
tensor<fp32, [?, ?]> var_121 = cast(dtype = var_121_dtype_0, x = relative_position)[name = tensor<string, []>("cast_56")];
tensor<fp32, [?, ?]> _inversed_123 = mul(x = var_121, y = _inversed_123_y_0)[name = tensor<string, []>("_inversed_123")];
tensor<fp32, []> var_124_epsilon_0 = const()[name = tensor<string, []>("op_124_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
tensor<fp32, [?, ?]> var_124 = log(epsilon = var_124_epsilon_0, x = _inversed_123)[name = tensor<string, []>("op_124")];
tensor<fp32, []> _inversed_126_y_0 = const()[name = tensor<string, []>("_inversed_126_y_0"), val = tensor<fp32, []>(0x1.715476p-2)];
tensor<fp32, [?, ?]> _inversed_126 = mul(x = var_124, y = _inversed_126_y_0)[name = tensor<string, []>("_inversed_126")];
tensor<fp32, []> var_127_promoted = const()[name = tensor<string, []>("op_127_promoted"), val = tensor<fp32, []>(0x1p+3)];
tensor<fp32, [?, ?]> var_128 = mul(x = _inversed_126, y = var_127_promoted)[name = tensor<string, []>("op_128")];
tensor<string, []> var_129_dtype_0 = const()[name = tensor<string, []>("op_129_dtype_0"), val = tensor<string, []>("int32")];
tensor<int32, []> var_130 = const()[name = tensor<string, []>("op_130"), val = tensor<int32, []>(8)];
tensor<int32, [?, ?]> var_129 = cast(dtype = var_129_dtype_0, x = var_128)[name = tensor<string, []>("cast_55")];
tensor<int32, [?, ?]> relative_position_if_large_1 = add(x = var_129, y = var_130)[name = tensor<string, []>("relative_position_if_large_1")];
tensor<int32, []> var_132_value_0 = const()[name = tensor<string, []>("op_132_value_0"), val = tensor<int32, []>(15)];
tensor<int32, [?, ?]> var_132 = fill_like(ref_tensor = relative_position_if_large_1, value = var_132_value_0)[name = tensor<string, []>("op_132")];
tensor<int32, [?, ?]> relative_position_if_large = minimum(x = relative_position_if_large_1, y = var_132)[name = tensor<string, []>("relative_position_if_large")];
tensor<int32, [?, ?]> var_134 = select(a = relative_position, b = relative_position_if_large, cond = is_small)[name = tensor<string, []>("op_134")];
tensor<int32, [?, ?]> input_5 = add(x = var_116, y = var_134)[name = tensor<string, []>("input_5")];
tensor<int32, []> values_batch_dims_0 = const()[name = tensor<string, []>("values_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<bool, []> values_validate_indices_0 = const()[name = tensor<string, []>("values_validate_indices_0"), val = tensor<bool, []>(false)];
tensor<int32, []> greater_equal_3_y_0 = const()[name = tensor<string, []>("greater_equal_3_y_0"), val = tensor<int32, []>(0)];
tensor<bool, [?, ?]> greater_equal_3 = greater_equal(x = input_5, y = greater_equal_3_y_0)[name = tensor<string, []>("greater_equal_3")];
tensor<int32, []> slice_by_index_3 = const()[name = tensor<string, []>("slice_by_index_3"), val = tensor<int32, []>(32)];
tensor<int32, [?, ?]> add_3 = add(x = input_5, y = slice_by_index_3)[name = tensor<string, []>("add_3")];
tensor<int32, [?, ?]> select_3 = select(a = input_5, b = add_3, cond = greater_equal_3)[name = tensor<string, []>("select_3")];
tensor<int32, []> values_axis_1 = const()[name = tensor<string, []>("values_axis_1"), val = tensor<int32, []>(0)];
tensor<fp32, [?, ?, 6]> values = gather(axis = values_axis_1, batch_dims = values_batch_dims_0, indices = select_3, validate_indices = values_validate_indices_0, x = encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight)[name = tensor<string, []>("values")];
tensor<int32, [3]> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, [3]>([2, 0, 1])];
tensor<int32, [1]> position_bias_1_axes_0 = const()[name = tensor<string, []>("position_bias_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp32, [6, ?, ?]> var_139 = transpose(perm = var_138, x = values)[name = tensor<string, []>("transpose_105")];
tensor<fp32, [1, 6, ?, ?]> position_bias_1 = expand_dims(axes = position_bias_1_axes_0, x = var_139)[name = tensor<string, []>("position_bias_1")];
tensor<fp32, [1, 6, ?, ?]> position_bias = add(x = position_bias_1, y = mask)[name = tensor<string, []>("position_bias")];
tensor<fp32, [1, 6, ?, ?]> scores_3 = add(x = scores_1, y = position_bias)[name = tensor<string, []>("scores_3")];
tensor<fp32, [1, 6, ?, ?]> var_144 = softmax(axis = var_21, x = scores_3)[name = tensor<string, []>("op_144")];
tensor<bool, []> states_7_transpose_x_0 = const()[name = tensor<string, []>("states_7_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_7_transpose_y_0 = const()[name = tensor<string, []>("states_7_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_1 = transpose(perm = value_states_1_perm_0, x = var_102)[name = tensor<string, []>("transpose_108")];
tensor<fp32, [1, 6, ?, 64]> states_7 = matmul(transpose_x = states_7_transpose_x_0, transpose_y = states_7_transpose_y_0, x = var_144, y = value_states_1)[name = tensor<string, []>("states_7")];
tensor<int32, [4]> var_148_perm_0 = const()[name = tensor<string, []>("op_148_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_150 = const()[name = tensor<string, []>("op_150"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_148 = transpose(perm = var_148_perm_0, x = states_7)[name = tensor<string, []>("transpose_104")];
tensor<fp32, [1, ?, 384]> input_11 = reshape(shape = var_150, x = var_148)[name = tensor<string, []>("input_11")];
tensor<fp32, [256]> linear_3_bias_0 = const()[name = tensor<string, []>("linear_3_bias_0"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57051456)))];
tensor<fp32, [1, ?, 256]> input_13 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_0_SelfAttention_o_weight, x = input_11)[name = tensor<string, []>("linear_3")];
tensor<fp32, [1, ?, 256]> hidden_states_9 = add(x = input_3, y = input_13)[name = tensor<string, []>("hidden_states_9")];
tensor<fp32, []> var_17_promoted_1 = const()[name = tensor<string, []>("op_17_promoted_1"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_166 = pow(x = hidden_states_9, y = var_17_promoted_1)[name = tensor<string, []>("op_166")];
tensor<int32, [1]> variance_3_axes_0 = const()[name = tensor<string, []>("variance_3_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_3_keep_dims_0 = const()[name = tensor<string, []>("variance_3_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_166)[name = tensor<string, []>("variance_3")];
tensor<fp32, []> var_169 = const()[name = tensor<string, []>("op_169"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_170 = add(x = variance_3, y = var_169)[name = tensor<string, []>("op_170")];
tensor<fp32, []> var_171_epsilon_0 = const()[name = tensor<string, []>("op_171_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_171 = rsqrt(epsilon = var_171_epsilon_0, x = var_170)[name = tensor<string, []>("op_171")];
tensor<fp32, [1, ?, 256]> hidden_states_13 = mul(x = hidden_states_9, y = var_171)[name = tensor<string, []>("hidden_states_13")];
tensor<fp32, [1, ?, 256]> input_15 = mul(x = encoder_block_0_layer_1_layer_norm_weight, y = hidden_states_13)[name = tensor<string, []>("input_15")];
tensor<fp32, [1024]> linear_4_bias_0 = const()[name = tensor<string, []>("linear_4_bias_0"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57052544)))];
tensor<fp32, [1, ?, 1024]> input_17 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_0_weight, x = input_15)[name = tensor<string, []>("linear_4")];
tensor<string, []> hidden_gelu_1_mode_0 = const()[name = tensor<string, []>("hidden_gelu_1_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_1 = gelu(mode = hidden_gelu_1_mode_0, x = input_17)[name = tensor<string, []>("hidden_gelu_1")];
tensor<fp32, [1, ?, 1024]> hidden_linear_1 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_1_weight, x = input_15)[name = tensor<string, []>("linear_5")];
tensor<fp32, [1, ?, 1024]> input_19 = mul(x = hidden_gelu_1, y = hidden_linear_1)[name = tensor<string, []>("input_19")];
tensor<fp32, [1, ?, 256]> input_23 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wo_weight, x = input_19)[name = tensor<string, []>("linear_6")];
tensor<fp32, [1, ?, 256]> hidden_states_15 = add(x = hidden_states_9, y = input_23)[name = tensor<string, []>("hidden_states_15")];
tensor<fp32, []> var_17_promoted_2 = const()[name = tensor<string, []>("op_17_promoted_2"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_213 = pow(x = hidden_states_15, y = var_17_promoted_2)[name = tensor<string, []>("op_213")];
tensor<int32, [1]> variance_5_axes_0 = const()[name = tensor<string, []>("variance_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_5_keep_dims_0 = const()[name = tensor<string, []>("variance_5_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_213)[name = tensor<string, []>("variance_5")];
tensor<fp32, []> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_217 = add(x = variance_5, y = var_216)[name = tensor<string, []>("op_217")];
tensor<fp32, []> var_218_epsilon_0 = const()[name = tensor<string, []>("op_218_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_218 = rsqrt(epsilon = var_218_epsilon_0, x = var_217)[name = tensor<string, []>("op_218")];
tensor<fp32, [1, ?, 256]> hidden_states_19 = mul(x = hidden_states_15, y = var_218)[name = tensor<string, []>("hidden_states_19")];
tensor<fp32, [1, ?, 256]> hidden_states_21 = mul(x = encoder_block_1_layer_0_layer_norm_weight, y = hidden_states_19)[name = tensor<string, []>("hidden_states_21")];
tensor<fp32, [1, ?, 384]> states_9 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_q_weight, x = hidden_states_21)[name = tensor<string, []>("linear_7")];
tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_232 = reshape(shape = var_231, x = states_9)[name = tensor<string, []>("op_232")];
tensor<fp32, [1, ?, 384]> states_11 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_k_weight, x = hidden_states_21)[name = tensor<string, []>("linear_8")];
tensor<int32, [4]> var_236 = const()[name = tensor<string, []>("op_236"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_237 = reshape(shape = var_236, x = states_11)[name = tensor<string, []>("op_237")];
tensor<fp32, [1, ?, 384]> states_13 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_v_weight, x = hidden_states_21)[name = tensor<string, []>("linear_9")];
tensor<int32, [4]> var_241 = const()[name = tensor<string, []>("op_241"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_242 = reshape(shape = var_241, x = states_13)[name = tensor<string, []>("op_242")];
tensor<int32, [4]> value_states_3_perm_0 = const()[name = tensor<string, []>("value_states_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_38_perm_0 = const()[name = tensor<string, []>("transpose_38_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_39_perm_0 = const()[name = tensor<string, []>("transpose_39_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_39 = transpose(perm = transpose_39_perm_0, x = var_237)[name = tensor<string, []>("transpose_101")];
tensor<fp32, [1, 6, ?, 64]> transpose_38 = transpose(perm = transpose_38_perm_0, x = var_232)[name = tensor<string, []>("transpose_102")];
tensor<fp32, [1, 6, ?, ?]> scores_5 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_38, y = transpose_39)[name = tensor<string, []>("scores_5")];
tensor<fp32, [1, 6, ?, ?]> scores_7 = add(x = scores_5, y = position_bias)[name = tensor<string, []>("scores_7")];
tensor<fp32, [1, 6, ?, ?]> var_248 = softmax(axis = var_21, x = scores_7)[name = tensor<string, []>("op_248")];
tensor<bool, []> states_15_transpose_x_0 = const()[name = tensor<string, []>("states_15_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_15_transpose_y_0 = const()[name = tensor<string, []>("states_15_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_3 = transpose(perm = value_states_3_perm_0, x = var_242)[name = tensor<string, []>("transpose_103")];
tensor<fp32, [1, 6, ?, 64]> states_15 = matmul(transpose_x = states_15_transpose_x_0, transpose_y = states_15_transpose_y_0, x = var_248, y = value_states_3)[name = tensor<string, []>("states_15")];
tensor<int32, [4]> var_252_perm_0 = const()[name = tensor<string, []>("op_252_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_254 = const()[name = tensor<string, []>("op_254"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_252 = transpose(perm = var_252_perm_0, x = states_15)[name = tensor<string, []>("transpose_100")];
tensor<fp32, [1, ?, 384]> input_29 = reshape(shape = var_254, x = var_252)[name = tensor<string, []>("input_29")];
tensor<fp32, [1, ?, 256]> input_31 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_0_SelfAttention_o_weight, x = input_29)[name = tensor<string, []>("linear_10")];
tensor<fp32, [1, ?, 256]> hidden_states_23 = add(x = hidden_states_15, y = input_31)[name = tensor<string, []>("hidden_states_23")];
tensor<fp32, []> var_17_promoted_3 = const()[name = tensor<string, []>("op_17_promoted_3"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_264 = pow(x = hidden_states_23, y = var_17_promoted_3)[name = tensor<string, []>("op_264")];
tensor<int32, [1]> variance_7_axes_0 = const()[name = tensor<string, []>("variance_7_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_7_keep_dims_0 = const()[name = tensor<string, []>("variance_7_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_264)[name = tensor<string, []>("variance_7")];
tensor<fp32, []> var_267 = const()[name = tensor<string, []>("op_267"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_268 = add(x = variance_7, y = var_267)[name = tensor<string, []>("op_268")];
tensor<fp32, []> var_269_epsilon_0 = const()[name = tensor<string, []>("op_269_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_269 = rsqrt(epsilon = var_269_epsilon_0, x = var_268)[name = tensor<string, []>("op_269")];
tensor<fp32, [1, ?, 256]> hidden_states_27 = mul(x = hidden_states_23, y = var_269)[name = tensor<string, []>("hidden_states_27")];
tensor<fp32, [1, ?, 256]> input_33 = mul(x = encoder_block_1_layer_1_layer_norm_weight, y = hidden_states_27)[name = tensor<string, []>("input_33")];
tensor<fp32, [1, ?, 1024]> input_35 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_0_weight, x = input_33)[name = tensor<string, []>("linear_11")];
tensor<string, []> hidden_gelu_3_mode_0 = const()[name = tensor<string, []>("hidden_gelu_3_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_3 = gelu(mode = hidden_gelu_3_mode_0, x = input_35)[name = tensor<string, []>("hidden_gelu_3")];
tensor<fp32, [1, ?, 1024]> hidden_linear_3 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_1_weight, x = input_33)[name = tensor<string, []>("linear_12")];
tensor<fp32, [1, ?, 1024]> input_37 = mul(x = hidden_gelu_3, y = hidden_linear_3)[name = tensor<string, []>("input_37")];
tensor<fp32, [1, ?, 256]> input_41 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wo_weight, x = input_37)[name = tensor<string, []>("linear_13")];
tensor<fp32, [1, ?, 256]> hidden_states_29 = add(x = hidden_states_23, y = input_41)[name = tensor<string, []>("hidden_states_29")];
tensor<fp32, []> var_17_promoted_4 = const()[name = tensor<string, []>("op_17_promoted_4"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_308 = pow(x = hidden_states_29, y = var_17_promoted_4)[name = tensor<string, []>("op_308")];
tensor<int32, [1]> variance_9_axes_0 = const()[name = tensor<string, []>("variance_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_9_keep_dims_0 = const()[name = tensor<string, []>("variance_9_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_308)[name = tensor<string, []>("variance_9")];
tensor<fp32, []> var_311 = const()[name = tensor<string, []>("op_311"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_312 = add(x = variance_9, y = var_311)[name = tensor<string, []>("op_312")];
tensor<fp32, []> var_313_epsilon_0 = const()[name = tensor<string, []>("op_313_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_313 = rsqrt(epsilon = var_313_epsilon_0, x = var_312)[name = tensor<string, []>("op_313")];
tensor<fp32, [1, ?, 256]> hidden_states_33 = mul(x = hidden_states_29, y = var_313)[name = tensor<string, []>("hidden_states_33")];
tensor<fp32, [1, ?, 256]> hidden_states_35 = mul(x = encoder_block_2_layer_0_layer_norm_weight, y = hidden_states_33)[name = tensor<string, []>("hidden_states_35")];
tensor<fp32, [1, ?, 384]> states_17 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_q_weight, x = hidden_states_35)[name = tensor<string, []>("linear_14")];
tensor<int32, [4]> var_326 = const()[name = tensor<string, []>("op_326"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_327 = reshape(shape = var_326, x = states_17)[name = tensor<string, []>("op_327")];
tensor<fp32, [1, ?, 384]> states_19 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_k_weight, x = hidden_states_35)[name = tensor<string, []>("linear_15")];
tensor<int32, [4]> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_332 = reshape(shape = var_331, x = states_19)[name = tensor<string, []>("op_332")];
tensor<fp32, [1, ?, 384]> states_21 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_v_weight, x = hidden_states_35)[name = tensor<string, []>("linear_16")];
tensor<int32, [4]> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_337 = reshape(shape = var_336, x = states_21)[name = tensor<string, []>("op_337")];
tensor<int32, [4]> value_states_5_perm_0 = const()[name = tensor<string, []>("value_states_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_40_perm_0 = const()[name = tensor<string, []>("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_41_perm_0 = const()[name = tensor<string, []>("transpose_41_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_41 = transpose(perm = transpose_41_perm_0, x = var_332)[name = tensor<string, []>("transpose_97")];
tensor<fp32, [1, 6, ?, 64]> transpose_40 = transpose(perm = transpose_40_perm_0, x = var_327)[name = tensor<string, []>("transpose_98")];
tensor<fp32, [1, 6, ?, ?]> scores_9 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_40, y = transpose_41)[name = tensor<string, []>("scores_9")];
tensor<fp32, [1, 6, ?, ?]> scores_11 = add(x = scores_9, y = position_bias)[name = tensor<string, []>("scores_11")];
tensor<fp32, [1, 6, ?, ?]> var_343 = softmax(axis = var_21, x = scores_11)[name = tensor<string, []>("op_343")];
tensor<bool, []> states_23_transpose_x_0 = const()[name = tensor<string, []>("states_23_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_23_transpose_y_0 = const()[name = tensor<string, []>("states_23_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_5 = transpose(perm = value_states_5_perm_0, x = var_337)[name = tensor<string, []>("transpose_99")];
tensor<fp32, [1, 6, ?, 64]> states_23 = matmul(transpose_x = states_23_transpose_x_0, transpose_y = states_23_transpose_y_0, x = var_343, y = value_states_5)[name = tensor<string, []>("states_23")];
tensor<int32, [4]> var_347_perm_0 = const()[name = tensor<string, []>("op_347_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_349 = const()[name = tensor<string, []>("op_349"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_347 = transpose(perm = var_347_perm_0, x = states_23)[name = tensor<string, []>("transpose_96")];
tensor<fp32, [1, ?, 384]> input_47 = reshape(shape = var_349, x = var_347)[name = tensor<string, []>("input_47")];
tensor<fp32, [1, ?, 256]> input_49 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_0_SelfAttention_o_weight, x = input_47)[name = tensor<string, []>("linear_17")];
tensor<fp32, [1, ?, 256]> hidden_states_37 = add(x = hidden_states_29, y = input_49)[name = tensor<string, []>("hidden_states_37")];
tensor<fp32, []> var_17_promoted_5 = const()[name = tensor<string, []>("op_17_promoted_5"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_359 = pow(x = hidden_states_37, y = var_17_promoted_5)[name = tensor<string, []>("op_359")];
tensor<int32, [1]> variance_11_axes_0 = const()[name = tensor<string, []>("variance_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_11_keep_dims_0 = const()[name = tensor<string, []>("variance_11_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_359)[name = tensor<string, []>("variance_11")];
tensor<fp32, []> var_362 = const()[name = tensor<string, []>("op_362"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_363 = add(x = variance_11, y = var_362)[name = tensor<string, []>("op_363")];
tensor<fp32, []> var_364_epsilon_0 = const()[name = tensor<string, []>("op_364_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_364 = rsqrt(epsilon = var_364_epsilon_0, x = var_363)[name = tensor<string, []>("op_364")];
tensor<fp32, [1, ?, 256]> hidden_states_41 = mul(x = hidden_states_37, y = var_364)[name = tensor<string, []>("hidden_states_41")];
tensor<fp32, [1, ?, 256]> input_51 = mul(x = encoder_block_2_layer_1_layer_norm_weight, y = hidden_states_41)[name = tensor<string, []>("input_51")];
tensor<fp32, [1, ?, 1024]> input_53 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_0_weight, x = input_51)[name = tensor<string, []>("linear_18")];
tensor<string, []> hidden_gelu_5_mode_0 = const()[name = tensor<string, []>("hidden_gelu_5_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_5 = gelu(mode = hidden_gelu_5_mode_0, x = input_53)[name = tensor<string, []>("hidden_gelu_5")];
tensor<fp32, [1, ?, 1024]> hidden_linear_5 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_1_weight, x = input_51)[name = tensor<string, []>("linear_19")];
tensor<fp32, [1, ?, 1024]> input_55 = mul(x = hidden_gelu_5, y = hidden_linear_5)[name = tensor<string, []>("input_55")];
tensor<fp32, [1, ?, 256]> input_59 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wo_weight, x = input_55)[name = tensor<string, []>("linear_20")];
tensor<fp32, [1, ?, 256]> hidden_states_43 = add(x = hidden_states_37, y = input_59)[name = tensor<string, []>("hidden_states_43")];
tensor<fp32, []> var_17_promoted_6 = const()[name = tensor<string, []>("op_17_promoted_6"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_403 = pow(x = hidden_states_43, y = var_17_promoted_6)[name = tensor<string, []>("op_403")];
tensor<int32, [1]> variance_13_axes_0 = const()[name = tensor<string, []>("variance_13_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_13_keep_dims_0 = const()[name = tensor<string, []>("variance_13_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_403)[name = tensor<string, []>("variance_13")];
tensor<fp32, []> var_406 = const()[name = tensor<string, []>("op_406"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_407 = add(x = variance_13, y = var_406)[name = tensor<string, []>("op_407")];
tensor<fp32, []> var_408_epsilon_0 = const()[name = tensor<string, []>("op_408_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_408 = rsqrt(epsilon = var_408_epsilon_0, x = var_407)[name = tensor<string, []>("op_408")];
tensor<fp32, [1, ?, 256]> hidden_states_47 = mul(x = hidden_states_43, y = var_408)[name = tensor<string, []>("hidden_states_47")];
tensor<fp32, [1, ?, 256]> hidden_states_49 = mul(x = encoder_block_3_layer_0_layer_norm_weight, y = hidden_states_47)[name = tensor<string, []>("hidden_states_49")];
tensor<fp32, [1, ?, 384]> states_25 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_q_weight, x = hidden_states_49)[name = tensor<string, []>("linear_21")];
tensor<int32, [4]> var_421 = const()[name = tensor<string, []>("op_421"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_422 = reshape(shape = var_421, x = states_25)[name = tensor<string, []>("op_422")];
tensor<fp32, [1, ?, 384]> states_27 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_k_weight, x = hidden_states_49)[name = tensor<string, []>("linear_22")];
tensor<int32, [4]> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_427 = reshape(shape = var_426, x = states_27)[name = tensor<string, []>("op_427")];
tensor<fp32, [1, ?, 384]> states_29 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_v_weight, x = hidden_states_49)[name = tensor<string, []>("linear_23")];
tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_432 = reshape(shape = var_431, x = states_29)[name = tensor<string, []>("op_432")];
tensor<int32, [4]> value_states_7_perm_0 = const()[name = tensor<string, []>("value_states_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_13_transpose_x_0 = const()[name = tensor<string, []>("scores_13_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_13_transpose_y_0 = const()[name = tensor<string, []>("scores_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_42_perm_0 = const()[name = tensor<string, []>("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_43_perm_0 = const()[name = tensor<string, []>("transpose_43_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_43 = transpose(perm = transpose_43_perm_0, x = var_427)[name = tensor<string, []>("transpose_93")];
tensor<fp32, [1, 6, ?, 64]> transpose_42 = transpose(perm = transpose_42_perm_0, x = var_422)[name = tensor<string, []>("transpose_94")];
tensor<fp32, [1, 6, ?, ?]> scores_13 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_42, y = transpose_43)[name = tensor<string, []>("scores_13")];
tensor<fp32, [1, 6, ?, ?]> scores_15 = add(x = scores_13, y = position_bias)[name = tensor<string, []>("scores_15")];
tensor<fp32, [1, 6, ?, ?]> var_438 = softmax(axis = var_21, x = scores_15)[name = tensor<string, []>("op_438")];
tensor<bool, []> states_31_transpose_x_0 = const()[name = tensor<string, []>("states_31_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_31_transpose_y_0 = const()[name = tensor<string, []>("states_31_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_7 = transpose(perm = value_states_7_perm_0, x = var_432)[name = tensor<string, []>("transpose_95")];
tensor<fp32, [1, 6, ?, 64]> states_31 = matmul(transpose_x = states_31_transpose_x_0, transpose_y = states_31_transpose_y_0, x = var_438, y = value_states_7)[name = tensor<string, []>("states_31")];
tensor<int32, [4]> var_442_perm_0 = const()[name = tensor<string, []>("op_442_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_442 = transpose(perm = var_442_perm_0, x = states_31)[name = tensor<string, []>("transpose_92")];
tensor<fp32, [1, ?, 384]> input_65 = reshape(shape = var_444, x = var_442)[name = tensor<string, []>("input_65")];
tensor<fp32, [1, ?, 256]> input_67 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_0_SelfAttention_o_weight, x = input_65)[name = tensor<string, []>("linear_24")];
tensor<fp32, [1, ?, 256]> hidden_states_51 = add(x = hidden_states_43, y = input_67)[name = tensor<string, []>("hidden_states_51")];
tensor<fp32, []> var_17_promoted_7 = const()[name = tensor<string, []>("op_17_promoted_7"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_454 = pow(x = hidden_states_51, y = var_17_promoted_7)[name = tensor<string, []>("op_454")];
tensor<int32, [1]> variance_15_axes_0 = const()[name = tensor<string, []>("variance_15_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_15_keep_dims_0 = const()[name = tensor<string, []>("variance_15_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_454)[name = tensor<string, []>("variance_15")];
tensor<fp32, []> var_457 = const()[name = tensor<string, []>("op_457"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_458 = add(x = variance_15, y = var_457)[name = tensor<string, []>("op_458")];
tensor<fp32, []> var_459_epsilon_0 = const()[name = tensor<string, []>("op_459_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_459 = rsqrt(epsilon = var_459_epsilon_0, x = var_458)[name = tensor<string, []>("op_459")];
tensor<fp32, [1, ?, 256]> hidden_states_55 = mul(x = hidden_states_51, y = var_459)[name = tensor<string, []>("hidden_states_55")];
tensor<fp32, [1, ?, 256]> input_69 = mul(x = encoder_block_3_layer_1_layer_norm_weight, y = hidden_states_55)[name = tensor<string, []>("input_69")];
tensor<fp32, [1, ?, 1024]> input_71 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_0_weight, x = input_69)[name = tensor<string, []>("linear_25")];
tensor<string, []> hidden_gelu_7_mode_0 = const()[name = tensor<string, []>("hidden_gelu_7_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_7 = gelu(mode = hidden_gelu_7_mode_0, x = input_71)[name = tensor<string, []>("hidden_gelu_7")];
tensor<fp32, [1, ?, 1024]> hidden_linear_7 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_1_weight, x = input_69)[name = tensor<string, []>("linear_26")];
tensor<fp32, [1, ?, 1024]> input_73 = mul(x = hidden_gelu_7, y = hidden_linear_7)[name = tensor<string, []>("input_73")];
tensor<fp32, [1, ?, 256]> input_77 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wo_weight, x = input_73)[name = tensor<string, []>("linear_27")];
tensor<fp32, [1, ?, 256]> hidden_states_57 = add(x = hidden_states_51, y = input_77)[name = tensor<string, []>("hidden_states_57")];
tensor<fp32, []> var_17_promoted_8 = const()[name = tensor<string, []>("op_17_promoted_8"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_498 = pow(x = hidden_states_57, y = var_17_promoted_8)[name = tensor<string, []>("op_498")];
tensor<int32, [1]> variance_17_axes_0 = const()[name = tensor<string, []>("variance_17_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_17_keep_dims_0 = const()[name = tensor<string, []>("variance_17_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_498)[name = tensor<string, []>("variance_17")];
tensor<fp32, []> var_501 = const()[name = tensor<string, []>("op_501"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_502 = add(x = variance_17, y = var_501)[name = tensor<string, []>("op_502")];
tensor<fp32, []> var_503_epsilon_0 = const()[name = tensor<string, []>("op_503_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_503 = rsqrt(epsilon = var_503_epsilon_0, x = var_502)[name = tensor<string, []>("op_503")];
tensor<fp32, [1, ?, 256]> hidden_states_61 = mul(x = hidden_states_57, y = var_503)[name = tensor<string, []>("hidden_states_61")];
tensor<fp32, [1, ?, 256]> hidden_states_63 = mul(x = encoder_block_4_layer_0_layer_norm_weight, y = hidden_states_61)[name = tensor<string, []>("hidden_states_63")];
tensor<fp32, [1, ?, 384]> states_33 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_q_weight, x = hidden_states_63)[name = tensor<string, []>("linear_28")];
tensor<int32, [4]> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_517 = reshape(shape = var_516, x = states_33)[name = tensor<string, []>("op_517")];
tensor<fp32, [1, ?, 384]> states_35 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_k_weight, x = hidden_states_63)[name = tensor<string, []>("linear_29")];
tensor<int32, [4]> var_521 = const()[name = tensor<string, []>("op_521"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_522 = reshape(shape = var_521, x = states_35)[name = tensor<string, []>("op_522")];
tensor<fp32, [1, ?, 384]> states_37 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_v_weight, x = hidden_states_63)[name = tensor<string, []>("linear_30")];
tensor<int32, [4]> var_526 = const()[name = tensor<string, []>("op_526"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_527 = reshape(shape = var_526, x = states_37)[name = tensor<string, []>("op_527")];
tensor<int32, [4]> value_states_9_perm_0 = const()[name = tensor<string, []>("value_states_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_17_transpose_x_0 = const()[name = tensor<string, []>("scores_17_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_17_transpose_y_0 = const()[name = tensor<string, []>("scores_17_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_44_perm_0 = const()[name = tensor<string, []>("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_45_perm_0 = const()[name = tensor<string, []>("transpose_45_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_45 = transpose(perm = transpose_45_perm_0, x = var_522)[name = tensor<string, []>("transpose_89")];
tensor<fp32, [1, 6, ?, 64]> transpose_44 = transpose(perm = transpose_44_perm_0, x = var_517)[name = tensor<string, []>("transpose_90")];
tensor<fp32, [1, 6, ?, ?]> scores_17 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_44, y = transpose_45)[name = tensor<string, []>("scores_17")];
tensor<fp32, [1, 6, ?, ?]> scores_19 = add(x = scores_17, y = position_bias)[name = tensor<string, []>("scores_19")];
tensor<fp32, [1, 6, ?, ?]> var_533 = softmax(axis = var_21, x = scores_19)[name = tensor<string, []>("op_533")];
tensor<bool, []> states_39_transpose_x_0 = const()[name = tensor<string, []>("states_39_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_39_transpose_y_0 = const()[name = tensor<string, []>("states_39_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_9 = transpose(perm = value_states_9_perm_0, x = var_527)[name = tensor<string, []>("transpose_91")];
tensor<fp32, [1, 6, ?, 64]> states_39 = matmul(transpose_x = states_39_transpose_x_0, transpose_y = states_39_transpose_y_0, x = var_533, y = value_states_9)[name = tensor<string, []>("states_39")];
tensor<int32, [4]> var_537_perm_0 = const()[name = tensor<string, []>("op_537_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_537 = transpose(perm = var_537_perm_0, x = states_39)[name = tensor<string, []>("transpose_88")];
tensor<fp32, [1, ?, 384]> input_83 = reshape(shape = var_539, x = var_537)[name = tensor<string, []>("input_83")];
tensor<fp32, [1, ?, 256]> input_85 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_0_SelfAttention_o_weight, x = input_83)[name = tensor<string, []>("linear_31")];
tensor<fp32, [1, ?, 256]> hidden_states_65 = add(x = hidden_states_57, y = input_85)[name = tensor<string, []>("hidden_states_65")];
tensor<fp32, []> var_17_promoted_9 = const()[name = tensor<string, []>("op_17_promoted_9"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_549 = pow(x = hidden_states_65, y = var_17_promoted_9)[name = tensor<string, []>("op_549")];
tensor<int32, [1]> variance_19_axes_0 = const()[name = tensor<string, []>("variance_19_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_19_keep_dims_0 = const()[name = tensor<string, []>("variance_19_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_549)[name = tensor<string, []>("variance_19")];
tensor<fp32, []> var_552 = const()[name = tensor<string, []>("op_552"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_553 = add(x = variance_19, y = var_552)[name = tensor<string, []>("op_553")];
tensor<fp32, []> var_554_epsilon_0 = const()[name = tensor<string, []>("op_554_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_554 = rsqrt(epsilon = var_554_epsilon_0, x = var_553)[name = tensor<string, []>("op_554")];
tensor<fp32, [1, ?, 256]> hidden_states_69 = mul(x = hidden_states_65, y = var_554)[name = tensor<string, []>("hidden_states_69")];
tensor<fp32, [1, ?, 256]> input_87 = mul(x = encoder_block_4_layer_1_layer_norm_weight, y = hidden_states_69)[name = tensor<string, []>("input_87")];
tensor<fp32, [1, ?, 1024]> input_89 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_0_weight, x = input_87)[name = tensor<string, []>("linear_32")];
tensor<string, []> hidden_gelu_9_mode_0 = const()[name = tensor<string, []>("hidden_gelu_9_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_9 = gelu(mode = hidden_gelu_9_mode_0, x = input_89)[name = tensor<string, []>("hidden_gelu_9")];
tensor<fp32, [1, ?, 1024]> hidden_linear_9 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_1_weight, x = input_87)[name = tensor<string, []>("linear_33")];
tensor<fp32, [1, ?, 1024]> input_91 = mul(x = hidden_gelu_9, y = hidden_linear_9)[name = tensor<string, []>("input_91")];
tensor<fp32, [1, ?, 256]> input_95 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wo_weight, x = input_91)[name = tensor<string, []>("linear_34")];
tensor<fp32, [1, ?, 256]> hidden_states_71 = add(x = hidden_states_65, y = input_95)[name = tensor<string, []>("hidden_states_71")];
tensor<fp32, []> var_17_promoted_10 = const()[name = tensor<string, []>("op_17_promoted_10"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_593 = pow(x = hidden_states_71, y = var_17_promoted_10)[name = tensor<string, []>("op_593")];
tensor<int32, [1]> variance_21_axes_0 = const()[name = tensor<string, []>("variance_21_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_21_keep_dims_0 = const()[name = tensor<string, []>("variance_21_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_593)[name = tensor<string, []>("variance_21")];
tensor<fp32, []> var_596 = const()[name = tensor<string, []>("op_596"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_597 = add(x = variance_21, y = var_596)[name = tensor<string, []>("op_597")];
tensor<fp32, []> var_598_epsilon_0 = const()[name = tensor<string, []>("op_598_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_598 = rsqrt(epsilon = var_598_epsilon_0, x = var_597)[name = tensor<string, []>("op_598")];
tensor<fp32, [1, ?, 256]> hidden_states_75 = mul(x = hidden_states_71, y = var_598)[name = tensor<string, []>("hidden_states_75")];
tensor<fp32, [1, ?, 256]> hidden_states_77 = mul(x = encoder_block_5_layer_0_layer_norm_weight, y = hidden_states_75)[name = tensor<string, []>("hidden_states_77")];
tensor<fp32, [1, ?, 384]> states_41 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_q_weight, x = hidden_states_77)[name = tensor<string, []>("linear_35")];
tensor<int32, [4]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_612 = reshape(shape = var_611, x = states_41)[name = tensor<string, []>("op_612")];
tensor<fp32, [1, ?, 384]> states_43 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_k_weight, x = hidden_states_77)[name = tensor<string, []>("linear_36")];
tensor<int32, [4]> var_616 = const()[name = tensor<string, []>("op_616"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_617 = reshape(shape = var_616, x = states_43)[name = tensor<string, []>("op_617")];
tensor<fp32, [1, ?, 384]> states_45 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_v_weight, x = hidden_states_77)[name = tensor<string, []>("linear_37")];
tensor<int32, [4]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_622 = reshape(shape = var_621, x = states_45)[name = tensor<string, []>("op_622")];
tensor<int32, [4]> value_states_11_perm_0 = const()[name = tensor<string, []>("value_states_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_21_transpose_x_0 = const()[name = tensor<string, []>("scores_21_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_21_transpose_y_0 = const()[name = tensor<string, []>("scores_21_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_46_perm_0 = const()[name = tensor<string, []>("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_47_perm_0 = const()[name = tensor<string, []>("transpose_47_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_47 = transpose(perm = transpose_47_perm_0, x = var_617)[name = tensor<string, []>("transpose_85")];
tensor<fp32, [1, 6, ?, 64]> transpose_46 = transpose(perm = transpose_46_perm_0, x = var_612)[name = tensor<string, []>("transpose_86")];
tensor<fp32, [1, 6, ?, ?]> scores_21 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_46, y = transpose_47)[name = tensor<string, []>("scores_21")];
tensor<fp32, [1, 6, ?, ?]> scores_23 = add(x = scores_21, y = position_bias)[name = tensor<string, []>("scores_23")];
tensor<fp32, [1, 6, ?, ?]> var_628 = softmax(axis = var_21, x = scores_23)[name = tensor<string, []>("op_628")];
tensor<bool, []> states_47_transpose_x_0 = const()[name = tensor<string, []>("states_47_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_47_transpose_y_0 = const()[name = tensor<string, []>("states_47_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_11 = transpose(perm = value_states_11_perm_0, x = var_622)[name = tensor<string, []>("transpose_87")];
tensor<fp32, [1, 6, ?, 64]> states_47 = matmul(transpose_x = states_47_transpose_x_0, transpose_y = states_47_transpose_y_0, x = var_628, y = value_states_11)[name = tensor<string, []>("states_47")];
tensor<int32, [4]> var_632_perm_0 = const()[name = tensor<string, []>("op_632_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_634 = const()[name = tensor<string, []>("op_634"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_632 = transpose(perm = var_632_perm_0, x = states_47)[name = tensor<string, []>("transpose_84")];
tensor<fp32, [1, ?, 384]> input_101 = reshape(shape = var_634, x = var_632)[name = tensor<string, []>("input_101")];
tensor<fp32, [1, ?, 256]> input_103 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_0_SelfAttention_o_weight, x = input_101)[name = tensor<string, []>("linear_38")];
tensor<fp32, [1, ?, 256]> hidden_states_79 = add(x = hidden_states_71, y = input_103)[name = tensor<string, []>("hidden_states_79")];
tensor<fp32, []> var_17_promoted_11 = const()[name = tensor<string, []>("op_17_promoted_11"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_644 = pow(x = hidden_states_79, y = var_17_promoted_11)[name = tensor<string, []>("op_644")];
tensor<int32, [1]> variance_23_axes_0 = const()[name = tensor<string, []>("variance_23_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_23_keep_dims_0 = const()[name = tensor<string, []>("variance_23_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_644)[name = tensor<string, []>("variance_23")];
tensor<fp32, []> var_647 = const()[name = tensor<string, []>("op_647"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_648 = add(x = variance_23, y = var_647)[name = tensor<string, []>("op_648")];
tensor<fp32, []> var_649_epsilon_0 = const()[name = tensor<string, []>("op_649_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_649 = rsqrt(epsilon = var_649_epsilon_0, x = var_648)[name = tensor<string, []>("op_649")];
tensor<fp32, [1, ?, 256]> hidden_states_83 = mul(x = hidden_states_79, y = var_649)[name = tensor<string, []>("hidden_states_83")];
tensor<fp32, [1, ?, 256]> input_105 = mul(x = encoder_block_5_layer_1_layer_norm_weight, y = hidden_states_83)[name = tensor<string, []>("input_105")];
tensor<fp32, [1, ?, 1024]> input_107 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_0_weight, x = input_105)[name = tensor<string, []>("linear_39")];
tensor<string, []> hidden_gelu_11_mode_0 = const()[name = tensor<string, []>("hidden_gelu_11_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_11 = gelu(mode = hidden_gelu_11_mode_0, x = input_107)[name = tensor<string, []>("hidden_gelu_11")];
tensor<fp32, [1, ?, 1024]> hidden_linear_11 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_1_weight, x = input_105)[name = tensor<string, []>("linear_40")];
tensor<fp32, [1, ?, 1024]> input_109 = mul(x = hidden_gelu_11, y = hidden_linear_11)[name = tensor<string, []>("input_109")];
tensor<fp32, [1, ?, 256]> input_113 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wo_weight, x = input_109)[name = tensor<string, []>("linear_41")];
tensor<fp32, [1, ?, 256]> hidden_states_85 = add(x = hidden_states_79, y = input_113)[name = tensor<string, []>("hidden_states_85")];
tensor<fp32, []> var_17_promoted_12 = const()[name = tensor<string, []>("op_17_promoted_12"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_688 = pow(x = hidden_states_85, y = var_17_promoted_12)[name = tensor<string, []>("op_688")];
tensor<int32, [1]> variance_25_axes_0 = const()[name = tensor<string, []>("variance_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_25_keep_dims_0 = const()[name = tensor<string, []>("variance_25_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_688)[name = tensor<string, []>("variance_25")];
tensor<fp32, []> var_691 = const()[name = tensor<string, []>("op_691"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_692 = add(x = variance_25, y = var_691)[name = tensor<string, []>("op_692")];
tensor<fp32, []> var_693_epsilon_0 = const()[name = tensor<string, []>("op_693_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_693 = rsqrt(epsilon = var_693_epsilon_0, x = var_692)[name = tensor<string, []>("op_693")];
tensor<fp32, [1, ?, 256]> hidden_states_89 = mul(x = hidden_states_85, y = var_693)[name = tensor<string, []>("hidden_states_89")];
tensor<fp32, [1, ?, 256]> hidden_states_91 = mul(x = encoder_block_6_layer_0_layer_norm_weight, y = hidden_states_89)[name = tensor<string, []>("hidden_states_91")];
tensor<fp32, [1, ?, 384]> states_49 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_q_weight, x = hidden_states_91)[name = tensor<string, []>("linear_42")];
tensor<int32, [4]> var_706 = const()[name = tensor<string, []>("op_706"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_707 = reshape(shape = var_706, x = states_49)[name = tensor<string, []>("op_707")];
tensor<fp32, [1, ?, 384]> states_51 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_k_weight, x = hidden_states_91)[name = tensor<string, []>("linear_43")];
tensor<int32, [4]> var_711 = const()[name = tensor<string, []>("op_711"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_712 = reshape(shape = var_711, x = states_51)[name = tensor<string, []>("op_712")];
tensor<fp32, [1, ?, 384]> states_53 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_v_weight, x = hidden_states_91)[name = tensor<string, []>("linear_44")];
tensor<int32, [4]> var_716 = const()[name = tensor<string, []>("op_716"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_717 = reshape(shape = var_716, x = states_53)[name = tensor<string, []>("op_717")];
tensor<int32, [4]> value_states_13_perm_0 = const()[name = tensor<string, []>("value_states_13_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_25_transpose_x_0 = const()[name = tensor<string, []>("scores_25_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_25_transpose_y_0 = const()[name = tensor<string, []>("scores_25_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_48_perm_0 = const()[name = tensor<string, []>("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_49_perm_0 = const()[name = tensor<string, []>("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_49 = transpose(perm = transpose_49_perm_0, x = var_712)[name = tensor<string, []>("transpose_81")];
tensor<fp32, [1, 6, ?, 64]> transpose_48 = transpose(perm = transpose_48_perm_0, x = var_707)[name = tensor<string, []>("transpose_82")];
tensor<fp32, [1, 6, ?, ?]> scores_25 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor<string, []>("scores_25")];
tensor<fp32, [1, 6, ?, ?]> scores_27 = add(x = scores_25, y = position_bias)[name = tensor<string, []>("scores_27")];
tensor<fp32, [1, 6, ?, ?]> var_723 = softmax(axis = var_21, x = scores_27)[name = tensor<string, []>("op_723")];
tensor<bool, []> states_55_transpose_x_0 = const()[name = tensor<string, []>("states_55_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_55_transpose_y_0 = const()[name = tensor<string, []>("states_55_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_13 = transpose(perm = value_states_13_perm_0, x = var_717)[name = tensor<string, []>("transpose_83")];
tensor<fp32, [1, 6, ?, 64]> states_55 = matmul(transpose_x = states_55_transpose_x_0, transpose_y = states_55_transpose_y_0, x = var_723, y = value_states_13)[name = tensor<string, []>("states_55")];
tensor<int32, [4]> var_727_perm_0 = const()[name = tensor<string, []>("op_727_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_729 = const()[name = tensor<string, []>("op_729"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_727 = transpose(perm = var_727_perm_0, x = states_55)[name = tensor<string, []>("transpose_80")];
tensor<fp32, [1, ?, 384]> input_119 = reshape(shape = var_729, x = var_727)[name = tensor<string, []>("input_119")];
tensor<fp32, [1, ?, 256]> input_121 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_0_SelfAttention_o_weight, x = input_119)[name = tensor<string, []>("linear_45")];
tensor<fp32, [1, ?, 256]> hidden_states_93 = add(x = hidden_states_85, y = input_121)[name = tensor<string, []>("hidden_states_93")];
tensor<fp32, []> var_17_promoted_13 = const()[name = tensor<string, []>("op_17_promoted_13"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_739 = pow(x = hidden_states_93, y = var_17_promoted_13)[name = tensor<string, []>("op_739")];
tensor<int32, [1]> variance_27_axes_0 = const()[name = tensor<string, []>("variance_27_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_27_keep_dims_0 = const()[name = tensor<string, []>("variance_27_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_739)[name = tensor<string, []>("variance_27")];
tensor<fp32, []> var_742 = const()[name = tensor<string, []>("op_742"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_743 = add(x = variance_27, y = var_742)[name = tensor<string, []>("op_743")];
tensor<fp32, []> var_744_epsilon_0 = const()[name = tensor<string, []>("op_744_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_744 = rsqrt(epsilon = var_744_epsilon_0, x = var_743)[name = tensor<string, []>("op_744")];
tensor<fp32, [1, ?, 256]> hidden_states_97 = mul(x = hidden_states_93, y = var_744)[name = tensor<string, []>("hidden_states_97")];
tensor<fp32, [1, ?, 256]> input_123 = mul(x = encoder_block_6_layer_1_layer_norm_weight, y = hidden_states_97)[name = tensor<string, []>("input_123")];
tensor<fp32, [1, ?, 1024]> input_125 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_0_weight, x = input_123)[name = tensor<string, []>("linear_46")];
tensor<string, []> hidden_gelu_13_mode_0 = const()[name = tensor<string, []>("hidden_gelu_13_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_13 = gelu(mode = hidden_gelu_13_mode_0, x = input_125)[name = tensor<string, []>("hidden_gelu_13")];
tensor<fp32, [1, ?, 1024]> hidden_linear_13 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_1_weight, x = input_123)[name = tensor<string, []>("linear_47")];
tensor<fp32, [1, ?, 1024]> input_127 = mul(x = hidden_gelu_13, y = hidden_linear_13)[name = tensor<string, []>("input_127")];
tensor<fp32, [1, ?, 256]> input_131 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wo_weight, x = input_127)[name = tensor<string, []>("linear_48")];
tensor<fp32, [1, ?, 256]> hidden_states_99 = add(x = hidden_states_93, y = input_131)[name = tensor<string, []>("hidden_states_99")];
tensor<fp32, []> var_17_promoted_14 = const()[name = tensor<string, []>("op_17_promoted_14"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_783 = pow(x = hidden_states_99, y = var_17_promoted_14)[name = tensor<string, []>("op_783")];
tensor<int32, [1]> variance_29_axes_0 = const()[name = tensor<string, []>("variance_29_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_29_keep_dims_0 = const()[name = tensor<string, []>("variance_29_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_783)[name = tensor<string, []>("variance_29")];
tensor<fp32, []> var_786 = const()[name = tensor<string, []>("op_786"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_787 = add(x = variance_29, y = var_786)[name = tensor<string, []>("op_787")];
tensor<fp32, []> var_788_epsilon_0 = const()[name = tensor<string, []>("op_788_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_788 = rsqrt(epsilon = var_788_epsilon_0, x = var_787)[name = tensor<string, []>("op_788")];
tensor<fp32, [1, ?, 256]> hidden_states_103 = mul(x = hidden_states_99, y = var_788)[name = tensor<string, []>("hidden_states_103")];
tensor<fp32, [1, ?, 256]> hidden_states_105 = mul(x = encoder_block_7_layer_0_layer_norm_weight, y = hidden_states_103)[name = tensor<string, []>("hidden_states_105")];
tensor<fp32, [1, ?, 384]> states_57 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_q_weight, x = hidden_states_105)[name = tensor<string, []>("linear_49")];
tensor<int32, [4]> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_802 = reshape(shape = var_801, x = states_57)[name = tensor<string, []>("op_802")];
tensor<fp32, [1, ?, 384]> states_59 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_k_weight, x = hidden_states_105)[name = tensor<string, []>("linear_50")];
tensor<int32, [4]> var_806 = const()[name = tensor<string, []>("op_806"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_807 = reshape(shape = var_806, x = states_59)[name = tensor<string, []>("op_807")];
tensor<fp32, [1, ?, 384]> states_61 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_v_weight, x = hidden_states_105)[name = tensor<string, []>("linear_51")];
tensor<int32, [4]> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_812 = reshape(shape = var_811, x = states_61)[name = tensor<string, []>("op_812")];
tensor<int32, [4]> value_states_15_perm_0 = const()[name = tensor<string, []>("value_states_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_29_transpose_x_0 = const()[name = tensor<string, []>("scores_29_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_29_transpose_y_0 = const()[name = tensor<string, []>("scores_29_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_50_perm_0 = const()[name = tensor<string, []>("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_51_perm_0 = const()[name = tensor<string, []>("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_51 = transpose(perm = transpose_51_perm_0, x = var_807)[name = tensor<string, []>("transpose_77")];
tensor<fp32, [1, 6, ?, 64]> transpose_50 = transpose(perm = transpose_50_perm_0, x = var_802)[name = tensor<string, []>("transpose_78")];
tensor<fp32, [1, 6, ?, ?]> scores_29 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor<string, []>("scores_29")];
tensor<fp32, [1, 6, ?, ?]> scores_31 = add(x = scores_29, y = position_bias)[name = tensor<string, []>("scores_31")];
tensor<fp32, [1, 6, ?, ?]> var_818 = softmax(axis = var_21, x = scores_31)[name = tensor<string, []>("op_818")];
tensor<bool, []> states_63_transpose_x_0 = const()[name = tensor<string, []>("states_63_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_63_transpose_y_0 = const()[name = tensor<string, []>("states_63_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_15 = transpose(perm = value_states_15_perm_0, x = var_812)[name = tensor<string, []>("transpose_79")];
tensor<fp32, [1, 6, ?, 64]> states_63 = matmul(transpose_x = states_63_transpose_x_0, transpose_y = states_63_transpose_y_0, x = var_818, y = value_states_15)[name = tensor<string, []>("states_63")];
tensor<int32, [4]> var_822_perm_0 = const()[name = tensor<string, []>("op_822_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_824 = const()[name = tensor<string, []>("op_824"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_822 = transpose(perm = var_822_perm_0, x = states_63)[name = tensor<string, []>("transpose_76")];
tensor<fp32, [1, ?, 384]> input_137 = reshape(shape = var_824, x = var_822)[name = tensor<string, []>("input_137")];
tensor<fp32, [1, ?, 256]> input_139 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_0_SelfAttention_o_weight, x = input_137)[name = tensor<string, []>("linear_52")];
tensor<fp32, [1, ?, 256]> hidden_states_107 = add(x = hidden_states_99, y = input_139)[name = tensor<string, []>("hidden_states_107")];
tensor<fp32, []> var_17_promoted_15 = const()[name = tensor<string, []>("op_17_promoted_15"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_834 = pow(x = hidden_states_107, y = var_17_promoted_15)[name = tensor<string, []>("op_834")];
tensor<int32, [1]> variance_31_axes_0 = const()[name = tensor<string, []>("variance_31_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_31_keep_dims_0 = const()[name = tensor<string, []>("variance_31_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_834)[name = tensor<string, []>("variance_31")];
tensor<fp32, []> var_837 = const()[name = tensor<string, []>("op_837"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_838 = add(x = variance_31, y = var_837)[name = tensor<string, []>("op_838")];
tensor<fp32, []> var_839_epsilon_0 = const()[name = tensor<string, []>("op_839_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_839 = rsqrt(epsilon = var_839_epsilon_0, x = var_838)[name = tensor<string, []>("op_839")];
tensor<fp32, [1, ?, 256]> hidden_states_111 = mul(x = hidden_states_107, y = var_839)[name = tensor<string, []>("hidden_states_111")];
tensor<fp32, [1, ?, 256]> input_141 = mul(x = encoder_block_7_layer_1_layer_norm_weight, y = hidden_states_111)[name = tensor<string, []>("input_141")];
tensor<fp32, [1, ?, 1024]> input_143 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_0_weight, x = input_141)[name = tensor<string, []>("linear_53")];
tensor<string, []> hidden_gelu_15_mode_0 = const()[name = tensor<string, []>("hidden_gelu_15_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_15 = gelu(mode = hidden_gelu_15_mode_0, x = input_143)[name = tensor<string, []>("hidden_gelu_15")];
tensor<fp32, [1, ?, 1024]> hidden_linear_15 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_1_weight, x = input_141)[name = tensor<string, []>("linear_54")];
tensor<fp32, [1, ?, 1024]> input_145 = mul(x = hidden_gelu_15, y = hidden_linear_15)[name = tensor<string, []>("input_145")];
tensor<fp32, [1, ?, 256]> input_149 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wo_weight, x = input_145)[name = tensor<string, []>("linear_55")];
tensor<fp32, [1, ?, 256]> hidden_states_113 = add(x = hidden_states_107, y = input_149)[name = tensor<string, []>("hidden_states_113")];
tensor<fp32, []> var_17_promoted_16 = const()[name = tensor<string, []>("op_17_promoted_16"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_878 = pow(x = hidden_states_113, y = var_17_promoted_16)[name = tensor<string, []>("op_878")];
tensor<int32, [1]> variance_33_axes_0 = const()[name = tensor<string, []>("variance_33_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_33_keep_dims_0 = const()[name = tensor<string, []>("variance_33_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_878)[name = tensor<string, []>("variance_33")];
tensor<fp32, []> var_881 = const()[name = tensor<string, []>("op_881"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_882 = add(x = variance_33, y = var_881)[name = tensor<string, []>("op_882")];
tensor<fp32, []> var_883_epsilon_0 = const()[name = tensor<string, []>("op_883_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_883 = rsqrt(epsilon = var_883_epsilon_0, x = var_882)[name = tensor<string, []>("op_883")];
tensor<fp32, [1, ?, 256]> hidden_states_117 = mul(x = hidden_states_113, y = var_883)[name = tensor<string, []>("hidden_states_117")];
tensor<fp32, [1, ?, 256]> hidden_states_119 = mul(x = encoder_block_8_layer_0_layer_norm_weight, y = hidden_states_117)[name = tensor<string, []>("hidden_states_119")];
tensor<fp32, [1, ?, 384]> states_65 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_q_weight, x = hidden_states_119)[name = tensor<string, []>("linear_56")];
tensor<int32, [4]> var_896 = const()[name = tensor<string, []>("op_896"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_897 = reshape(shape = var_896, x = states_65)[name = tensor<string, []>("op_897")];
tensor<fp32, [1, ?, 384]> states_67 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_k_weight, x = hidden_states_119)[name = tensor<string, []>("linear_57")];
tensor<int32, [4]> var_901 = const()[name = tensor<string, []>("op_901"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_902 = reshape(shape = var_901, x = states_67)[name = tensor<string, []>("op_902")];
tensor<fp32, [1, ?, 384]> states_69 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_v_weight, x = hidden_states_119)[name = tensor<string, []>("linear_58")];
tensor<int32, [4]> var_906 = const()[name = tensor<string, []>("op_906"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_907 = reshape(shape = var_906, x = states_69)[name = tensor<string, []>("op_907")];
tensor<int32, [4]> value_states_17_perm_0 = const()[name = tensor<string, []>("value_states_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_33_transpose_x_0 = const()[name = tensor<string, []>("scores_33_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_33_transpose_y_0 = const()[name = tensor<string, []>("scores_33_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_52_perm_0 = const()[name = tensor<string, []>("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_53_perm_0 = const()[name = tensor<string, []>("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_53 = transpose(perm = transpose_53_perm_0, x = var_902)[name = tensor<string, []>("transpose_73")];
tensor<fp32, [1, 6, ?, 64]> transpose_52 = transpose(perm = transpose_52_perm_0, x = var_897)[name = tensor<string, []>("transpose_74")];
tensor<fp32, [1, 6, ?, ?]> scores_33 = matmul(transpose_x = scores_33_transpose_x_0, transpose_y = scores_33_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor<string, []>("scores_33")];
tensor<fp32, [1, 6, ?, ?]> scores_35 = add(x = scores_33, y = position_bias)[name = tensor<string, []>("scores_35")];
tensor<fp32, [1, 6, ?, ?]> var_913 = softmax(axis = var_21, x = scores_35)[name = tensor<string, []>("op_913")];
tensor<bool, []> states_71_transpose_x_0 = const()[name = tensor<string, []>("states_71_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_71_transpose_y_0 = const()[name = tensor<string, []>("states_71_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_17 = transpose(perm = value_states_17_perm_0, x = var_907)[name = tensor<string, []>("transpose_75")];
tensor<fp32, [1, 6, ?, 64]> states_71 = matmul(transpose_x = states_71_transpose_x_0, transpose_y = states_71_transpose_y_0, x = var_913, y = value_states_17)[name = tensor<string, []>("states_71")];
tensor<int32, [4]> var_917_perm_0 = const()[name = tensor<string, []>("op_917_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_919 = const()[name = tensor<string, []>("op_919"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_917 = transpose(perm = var_917_perm_0, x = states_71)[name = tensor<string, []>("transpose_72")];
tensor<fp32, [1, ?, 384]> input_155 = reshape(shape = var_919, x = var_917)[name = tensor<string, []>("input_155")];
tensor<fp32, [1, ?, 256]> input_157 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_0_SelfAttention_o_weight, x = input_155)[name = tensor<string, []>("linear_59")];
tensor<fp32, [1, ?, 256]> hidden_states_121 = add(x = hidden_states_113, y = input_157)[name = tensor<string, []>("hidden_states_121")];
tensor<fp32, []> var_17_promoted_17 = const()[name = tensor<string, []>("op_17_promoted_17"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_929 = pow(x = hidden_states_121, y = var_17_promoted_17)[name = tensor<string, []>("op_929")];
tensor<int32, [1]> variance_35_axes_0 = const()[name = tensor<string, []>("variance_35_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_35_keep_dims_0 = const()[name = tensor<string, []>("variance_35_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_929)[name = tensor<string, []>("variance_35")];
tensor<fp32, []> var_932 = const()[name = tensor<string, []>("op_932"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_933 = add(x = variance_35, y = var_932)[name = tensor<string, []>("op_933")];
tensor<fp32, []> var_934_epsilon_0 = const()[name = tensor<string, []>("op_934_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_934 = rsqrt(epsilon = var_934_epsilon_0, x = var_933)[name = tensor<string, []>("op_934")];
tensor<fp32, [1, ?, 256]> hidden_states_125 = mul(x = hidden_states_121, y = var_934)[name = tensor<string, []>("hidden_states_125")];
tensor<fp32, [1, ?, 256]> input_159 = mul(x = encoder_block_8_layer_1_layer_norm_weight, y = hidden_states_125)[name = tensor<string, []>("input_159")];
tensor<fp32, [1, ?, 1024]> input_161 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_0_weight, x = input_159)[name = tensor<string, []>("linear_60")];
tensor<string, []> hidden_gelu_17_mode_0 = const()[name = tensor<string, []>("hidden_gelu_17_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_17 = gelu(mode = hidden_gelu_17_mode_0, x = input_161)[name = tensor<string, []>("hidden_gelu_17")];
tensor<fp32, [1, ?, 1024]> hidden_linear_17 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_1_weight, x = input_159)[name = tensor<string, []>("linear_61")];
tensor<fp32, [1, ?, 1024]> input_163 = mul(x = hidden_gelu_17, y = hidden_linear_17)[name = tensor<string, []>("input_163")];
tensor<fp32, [1, ?, 256]> input_167 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wo_weight, x = input_163)[name = tensor<string, []>("linear_62")];
tensor<fp32, [1, ?, 256]> hidden_states_127 = add(x = hidden_states_121, y = input_167)[name = tensor<string, []>("hidden_states_127")];
tensor<fp32, []> var_17_promoted_18 = const()[name = tensor<string, []>("op_17_promoted_18"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_973 = pow(x = hidden_states_127, y = var_17_promoted_18)[name = tensor<string, []>("op_973")];
tensor<int32, [1]> variance_37_axes_0 = const()[name = tensor<string, []>("variance_37_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_37_keep_dims_0 = const()[name = tensor<string, []>("variance_37_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_973)[name = tensor<string, []>("variance_37")];
tensor<fp32, []> var_976 = const()[name = tensor<string, []>("op_976"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_977 = add(x = variance_37, y = var_976)[name = tensor<string, []>("op_977")];
tensor<fp32, []> var_978_epsilon_0 = const()[name = tensor<string, []>("op_978_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_978 = rsqrt(epsilon = var_978_epsilon_0, x = var_977)[name = tensor<string, []>("op_978")];
tensor<fp32, [1, ?, 256]> hidden_states_131 = mul(x = hidden_states_127, y = var_978)[name = tensor<string, []>("hidden_states_131")];
tensor<fp32, [1, ?, 256]> hidden_states_133 = mul(x = encoder_block_9_layer_0_layer_norm_weight, y = hidden_states_131)[name = tensor<string, []>("hidden_states_133")];
tensor<fp32, [1, ?, 384]> states_73 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_q_weight, x = hidden_states_133)[name = tensor<string, []>("linear_63")];
tensor<int32, [4]> var_991 = const()[name = tensor<string, []>("op_991"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_992 = reshape(shape = var_991, x = states_73)[name = tensor<string, []>("op_992")];
tensor<fp32, [1, ?, 384]> states_75 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_k_weight, x = hidden_states_133)[name = tensor<string, []>("linear_64")];
tensor<int32, [4]> var_996 = const()[name = tensor<string, []>("op_996"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_997 = reshape(shape = var_996, x = states_75)[name = tensor<string, []>("op_997")];
tensor<fp32, [1, ?, 384]> states_77 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_v_weight, x = hidden_states_133)[name = tensor<string, []>("linear_65")];
tensor<int32, [4]> var_1001 = const()[name = tensor<string, []>("op_1001"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1002 = reshape(shape = var_1001, x = states_77)[name = tensor<string, []>("op_1002")];
tensor<int32, [4]> value_states_19_perm_0 = const()[name = tensor<string, []>("value_states_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_37_transpose_x_0 = const()[name = tensor<string, []>("scores_37_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_37_transpose_y_0 = const()[name = tensor<string, []>("scores_37_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_54_perm_0 = const()[name = tensor<string, []>("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_55_perm_0 = const()[name = tensor<string, []>("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_55 = transpose(perm = transpose_55_perm_0, x = var_997)[name = tensor<string, []>("transpose_69")];
tensor<fp32, [1, 6, ?, 64]> transpose_54 = transpose(perm = transpose_54_perm_0, x = var_992)[name = tensor<string, []>("transpose_70")];
tensor<fp32, [1, 6, ?, ?]> scores_37 = matmul(transpose_x = scores_37_transpose_x_0, transpose_y = scores_37_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor<string, []>("scores_37")];
tensor<fp32, [1, 6, ?, ?]> scores_39 = add(x = scores_37, y = position_bias)[name = tensor<string, []>("scores_39")];
tensor<fp32, [1, 6, ?, ?]> var_1008 = softmax(axis = var_21, x = scores_39)[name = tensor<string, []>("op_1008")];
tensor<bool, []> states_79_transpose_x_0 = const()[name = tensor<string, []>("states_79_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_79_transpose_y_0 = const()[name = tensor<string, []>("states_79_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_19 = transpose(perm = value_states_19_perm_0, x = var_1002)[name = tensor<string, []>("transpose_71")];
tensor<fp32, [1, 6, ?, 64]> states_79 = matmul(transpose_x = states_79_transpose_x_0, transpose_y = states_79_transpose_y_0, x = var_1008, y = value_states_19)[name = tensor<string, []>("states_79")];
tensor<int32, [4]> var_1012_perm_0 = const()[name = tensor<string, []>("op_1012_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1014 = const()[name = tensor<string, []>("op_1014"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_1012 = transpose(perm = var_1012_perm_0, x = states_79)[name = tensor<string, []>("transpose_68")];
tensor<fp32, [1, ?, 384]> input_173 = reshape(shape = var_1014, x = var_1012)[name = tensor<string, []>("input_173")];
tensor<fp32, [1, ?, 256]> input_175 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_0_SelfAttention_o_weight, x = input_173)[name = tensor<string, []>("linear_66")];
tensor<fp32, [1, ?, 256]> hidden_states_135 = add(x = hidden_states_127, y = input_175)[name = tensor<string, []>("hidden_states_135")];
tensor<fp32, []> var_17_promoted_19 = const()[name = tensor<string, []>("op_17_promoted_19"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1024 = pow(x = hidden_states_135, y = var_17_promoted_19)[name = tensor<string, []>("op_1024")];
tensor<int32, [1]> variance_39_axes_0 = const()[name = tensor<string, []>("variance_39_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_39_keep_dims_0 = const()[name = tensor<string, []>("variance_39_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1024)[name = tensor<string, []>("variance_39")];
tensor<fp32, []> var_1027 = const()[name = tensor<string, []>("op_1027"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1028 = add(x = variance_39, y = var_1027)[name = tensor<string, []>("op_1028")];
tensor<fp32, []> var_1029_epsilon_0 = const()[name = tensor<string, []>("op_1029_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1029 = rsqrt(epsilon = var_1029_epsilon_0, x = var_1028)[name = tensor<string, []>("op_1029")];
tensor<fp32, [1, ?, 256]> hidden_states_139 = mul(x = hidden_states_135, y = var_1029)[name = tensor<string, []>("hidden_states_139")];
tensor<fp32, [1, ?, 256]> input_177 = mul(x = encoder_block_9_layer_1_layer_norm_weight, y = hidden_states_139)[name = tensor<string, []>("input_177")];
tensor<fp32, [1, ?, 1024]> input_179 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_0_weight, x = input_177)[name = tensor<string, []>("linear_67")];
tensor<string, []> hidden_gelu_19_mode_0 = const()[name = tensor<string, []>("hidden_gelu_19_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_19 = gelu(mode = hidden_gelu_19_mode_0, x = input_179)[name = tensor<string, []>("hidden_gelu_19")];
tensor<fp32, [1, ?, 1024]> hidden_linear_19 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_1_weight, x = input_177)[name = tensor<string, []>("linear_68")];
tensor<fp32, [1, ?, 1024]> input_181 = mul(x = hidden_gelu_19, y = hidden_linear_19)[name = tensor<string, []>("input_181")];
tensor<fp32, [1, ?, 256]> input_185 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wo_weight, x = input_181)[name = tensor<string, []>("linear_69")];
tensor<fp32, [1, ?, 256]> hidden_states_141 = add(x = hidden_states_135, y = input_185)[name = tensor<string, []>("hidden_states_141")];
tensor<fp32, []> var_17_promoted_20 = const()[name = tensor<string, []>("op_17_promoted_20"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1068 = pow(x = hidden_states_141, y = var_17_promoted_20)[name = tensor<string, []>("op_1068")];
tensor<int32, [1]> variance_41_axes_0 = const()[name = tensor<string, []>("variance_41_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_41_keep_dims_0 = const()[name = tensor<string, []>("variance_41_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_41 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1068)[name = tensor<string, []>("variance_41")];
tensor<fp32, []> var_1071 = const()[name = tensor<string, []>("op_1071"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1072 = add(x = variance_41, y = var_1071)[name = tensor<string, []>("op_1072")];
tensor<fp32, []> var_1073_epsilon_0 = const()[name = tensor<string, []>("op_1073_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1073 = rsqrt(epsilon = var_1073_epsilon_0, x = var_1072)[name = tensor<string, []>("op_1073")];
tensor<fp32, [1, ?, 256]> hidden_states_145 = mul(x = hidden_states_141, y = var_1073)[name = tensor<string, []>("hidden_states_145")];
tensor<fp32, [1, ?, 256]> hidden_states_147 = mul(x = encoder_block_10_layer_0_layer_norm_weight, y = hidden_states_145)[name = tensor<string, []>("hidden_states_147")];
tensor<fp32, [1, ?, 384]> states_81 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_q_weight, x = hidden_states_147)[name = tensor<string, []>("linear_70")];
tensor<int32, [4]> var_1086 = const()[name = tensor<string, []>("op_1086"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1087 = reshape(shape = var_1086, x = states_81)[name = tensor<string, []>("op_1087")];
tensor<fp32, [1, ?, 384]> states_83 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_k_weight, x = hidden_states_147)[name = tensor<string, []>("linear_71")];
tensor<int32, [4]> var_1091 = const()[name = tensor<string, []>("op_1091"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1092 = reshape(shape = var_1091, x = states_83)[name = tensor<string, []>("op_1092")];
tensor<fp32, [1, ?, 384]> states_85 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_v_weight, x = hidden_states_147)[name = tensor<string, []>("linear_72")];
tensor<int32, [4]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1097 = reshape(shape = var_1096, x = states_85)[name = tensor<string, []>("op_1097")];
tensor<int32, [4]> value_states_21_perm_0 = const()[name = tensor<string, []>("value_states_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_41_transpose_x_0 = const()[name = tensor<string, []>("scores_41_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_41_transpose_y_0 = const()[name = tensor<string, []>("scores_41_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_56_perm_0 = const()[name = tensor<string, []>("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_57_perm_0 = const()[name = tensor<string, []>("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_57 = transpose(perm = transpose_57_perm_0, x = var_1092)[name = tensor<string, []>("transpose_65")];
tensor<fp32, [1, 6, ?, 64]> transpose_56 = transpose(perm = transpose_56_perm_0, x = var_1087)[name = tensor<string, []>("transpose_66")];
tensor<fp32, [1, 6, ?, ?]> scores_41 = matmul(transpose_x = scores_41_transpose_x_0, transpose_y = scores_41_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor<string, []>("scores_41")];
tensor<fp32, [1, 6, ?, ?]> scores_43 = add(x = scores_41, y = position_bias)[name = tensor<string, []>("scores_43")];
tensor<fp32, [1, 6, ?, ?]> var_1103 = softmax(axis = var_21, x = scores_43)[name = tensor<string, []>("op_1103")];
tensor<bool, []> states_87_transpose_x_0 = const()[name = tensor<string, []>("states_87_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_87_transpose_y_0 = const()[name = tensor<string, []>("states_87_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states_21 = transpose(perm = value_states_21_perm_0, x = var_1097)[name = tensor<string, []>("transpose_67")];
tensor<fp32, [1, 6, ?, 64]> states_87 = matmul(transpose_x = states_87_transpose_x_0, transpose_y = states_87_transpose_y_0, x = var_1103, y = value_states_21)[name = tensor<string, []>("states_87")];
tensor<int32, [4]> var_1107_perm_0 = const()[name = tensor<string, []>("op_1107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_1107 = transpose(perm = var_1107_perm_0, x = states_87)[name = tensor<string, []>("transpose_64")];
tensor<fp32, [1, ?, 384]> input_191 = reshape(shape = var_1109, x = var_1107)[name = tensor<string, []>("input_191")];
tensor<fp32, [1, ?, 256]> input_193 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_0_SelfAttention_o_weight, x = input_191)[name = tensor<string, []>("linear_73")];
tensor<fp32, [1, ?, 256]> hidden_states_149 = add(x = hidden_states_141, y = input_193)[name = tensor<string, []>("hidden_states_149")];
tensor<fp32, []> var_17_promoted_21 = const()[name = tensor<string, []>("op_17_promoted_21"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1119 = pow(x = hidden_states_149, y = var_17_promoted_21)[name = tensor<string, []>("op_1119")];
tensor<int32, [1]> variance_43_axes_0 = const()[name = tensor<string, []>("variance_43_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_43_keep_dims_0 = const()[name = tensor<string, []>("variance_43_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_43 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1119)[name = tensor<string, []>("variance_43")];
tensor<fp32, []> var_1122 = const()[name = tensor<string, []>("op_1122"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1123 = add(x = variance_43, y = var_1122)[name = tensor<string, []>("op_1123")];
tensor<fp32, []> var_1124_epsilon_0 = const()[name = tensor<string, []>("op_1124_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1124 = rsqrt(epsilon = var_1124_epsilon_0, x = var_1123)[name = tensor<string, []>("op_1124")];
tensor<fp32, [1, ?, 256]> hidden_states_153 = mul(x = hidden_states_149, y = var_1124)[name = tensor<string, []>("hidden_states_153")];
tensor<fp32, [1, ?, 256]> input_195 = mul(x = encoder_block_10_layer_1_layer_norm_weight, y = hidden_states_153)[name = tensor<string, []>("input_195")];
tensor<fp32, [1, ?, 1024]> input_197 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_0_weight, x = input_195)[name = tensor<string, []>("linear_74")];
tensor<string, []> hidden_gelu_21_mode_0 = const()[name = tensor<string, []>("hidden_gelu_21_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu_21 = gelu(mode = hidden_gelu_21_mode_0, x = input_197)[name = tensor<string, []>("hidden_gelu_21")];
tensor<fp32, [1, ?, 1024]> hidden_linear_21 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_1_weight, x = input_195)[name = tensor<string, []>("linear_75")];
tensor<fp32, [1, ?, 1024]> input_199 = mul(x = hidden_gelu_21, y = hidden_linear_21)[name = tensor<string, []>("input_199")];
tensor<fp32, [1, ?, 256]> input_203 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wo_weight, x = input_199)[name = tensor<string, []>("linear_76")];
tensor<fp32, [1, ?, 256]> hidden_states_155 = add(x = hidden_states_149, y = input_203)[name = tensor<string, []>("hidden_states_155")];
tensor<fp32, []> var_17_promoted_22 = const()[name = tensor<string, []>("op_17_promoted_22"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1163 = pow(x = hidden_states_155, y = var_17_promoted_22)[name = tensor<string, []>("op_1163")];
tensor<int32, [1]> variance_45_axes_0 = const()[name = tensor<string, []>("variance_45_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_45_keep_dims_0 = const()[name = tensor<string, []>("variance_45_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_45 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1163)[name = tensor<string, []>("variance_45")];
tensor<fp32, []> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1167 = add(x = variance_45, y = var_1166)[name = tensor<string, []>("op_1167")];
tensor<fp32, []> var_1168_epsilon_0 = const()[name = tensor<string, []>("op_1168_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1168 = rsqrt(epsilon = var_1168_epsilon_0, x = var_1167)[name = tensor<string, []>("op_1168")];
tensor<fp32, [1, ?, 256]> hidden_states_159 = mul(x = hidden_states_155, y = var_1168)[name = tensor<string, []>("hidden_states_159")];
tensor<fp32, [1, ?, 256]> hidden_states_161 = mul(x = encoder_block_11_layer_0_layer_norm_weight, y = hidden_states_159)[name = tensor<string, []>("hidden_states_161")];
tensor<fp32, [1, ?, 384]> states_89 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_q_weight, x = hidden_states_161)[name = tensor<string, []>("linear_77")];
tensor<int32, [4]> var_1181 = const()[name = tensor<string, []>("op_1181"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1182 = reshape(shape = var_1181, x = states_89)[name = tensor<string, []>("op_1182")];
tensor<fp32, [1, ?, 384]> states_91 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_k_weight, x = hidden_states_161)[name = tensor<string, []>("linear_78")];
tensor<int32, [4]> var_1186 = const()[name = tensor<string, []>("op_1186"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1187 = reshape(shape = var_1186, x = states_91)[name = tensor<string, []>("op_1187")];
tensor<fp32, [1, ?, 384]> states_93 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_v_weight, x = hidden_states_161)[name = tensor<string, []>("linear_79")];
tensor<int32, [4]> var_1191 = const()[name = tensor<string, []>("op_1191"), val = tensor<int32, [4]>([1, -1, 6, 64])];
tensor<fp32, [1, ?, 6, 64]> var_1192 = reshape(shape = var_1191, x = states_93)[name = tensor<string, []>("op_1192")];
tensor<int32, [4]> value_states_perm_0 = const()[name = tensor<string, []>("value_states_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> scores_45_transpose_x_0 = const()[name = tensor<string, []>("scores_45_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_45_transpose_y_0 = const()[name = tensor<string, []>("scores_45_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_58_perm_0 = const()[name = tensor<string, []>("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_59_perm_0 = const()[name = tensor<string, []>("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp32, [1, 6, 64, ?]> transpose_59 = transpose(perm = transpose_59_perm_0, x = var_1187)[name = tensor<string, []>("transpose_61")];
tensor<fp32, [1, 6, ?, 64]> transpose_58 = transpose(perm = transpose_58_perm_0, x = var_1182)[name = tensor<string, []>("transpose_62")];
tensor<fp32, [1, 6, ?, ?]> scores_45 = matmul(transpose_x = scores_45_transpose_x_0, transpose_y = scores_45_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor<string, []>("scores_45")];
tensor<fp32, [1, 6, ?, ?]> scores = add(x = scores_45, y = position_bias)[name = tensor<string, []>("scores")];
tensor<fp32, [1, 6, ?, ?]> var_1198 = softmax(axis = var_21, x = scores)[name = tensor<string, []>("op_1198")];
tensor<bool, []> states_transpose_x_0 = const()[name = tensor<string, []>("states_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> states_transpose_y_0 = const()[name = tensor<string, []>("states_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp32, [1, 6, ?, 64]> value_states = transpose(perm = value_states_perm_0, x = var_1192)[name = tensor<string, []>("transpose_63")];
tensor<fp32, [1, 6, ?, 64]> states = matmul(transpose_x = states_transpose_x_0, transpose_y = states_transpose_y_0, x = var_1198, y = value_states)[name = tensor<string, []>("states")];
tensor<int32, [4]> var_1202_perm_0 = const()[name = tensor<string, []>("op_1202_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1204 = const()[name = tensor<string, []>("op_1204"), val = tensor<int32, [3]>([1, -1, 384])];
tensor<fp32, [1, ?, 6, 64]> var_1202 = transpose(perm = var_1202_perm_0, x = states)[name = tensor<string, []>("transpose_60")];
tensor<fp32, [1, ?, 384]> input_209 = reshape(shape = var_1204, x = var_1202)[name = tensor<string, []>("input_209")];
tensor<fp32, [1, ?, 256]> input_211 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_0_SelfAttention_o_weight, x = input_209)[name = tensor<string, []>("linear_80")];
tensor<fp32, [1, ?, 256]> hidden_states_163 = add(x = hidden_states_155, y = input_211)[name = tensor<string, []>("hidden_states_163")];
tensor<fp32, []> var_17_promoted_23 = const()[name = tensor<string, []>("op_17_promoted_23"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1214 = pow(x = hidden_states_163, y = var_17_promoted_23)[name = tensor<string, []>("op_1214")];
tensor<int32, [1]> variance_47_axes_0 = const()[name = tensor<string, []>("variance_47_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_47_keep_dims_0 = const()[name = tensor<string, []>("variance_47_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance_47 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1214)[name = tensor<string, []>("variance_47")];
tensor<fp32, []> var_1217 = const()[name = tensor<string, []>("op_1217"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1218 = add(x = variance_47, y = var_1217)[name = tensor<string, []>("op_1218")];
tensor<fp32, []> var_1219_epsilon_0 = const()[name = tensor<string, []>("op_1219_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1219 = rsqrt(epsilon = var_1219_epsilon_0, x = var_1218)[name = tensor<string, []>("op_1219")];
tensor<fp32, [1, ?, 256]> hidden_states_167 = mul(x = hidden_states_163, y = var_1219)[name = tensor<string, []>("hidden_states_167")];
tensor<fp32, [1, ?, 256]> input_213 = mul(x = encoder_block_11_layer_1_layer_norm_weight, y = hidden_states_167)[name = tensor<string, []>("input_213")];
tensor<fp32, [1, ?, 1024]> input_215 = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_0_weight, x = input_213)[name = tensor<string, []>("linear_81")];
tensor<string, []> hidden_gelu_mode_0 = const()[name = tensor<string, []>("hidden_gelu_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp32, [1, ?, 1024]> hidden_gelu = gelu(mode = hidden_gelu_mode_0, x = input_215)[name = tensor<string, []>("hidden_gelu")];
tensor<fp32, [1, ?, 1024]> hidden_linear = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_1_weight, x = input_213)[name = tensor<string, []>("linear_82")];
tensor<fp32, [1, ?, 1024]> input_217 = mul(x = hidden_gelu, y = hidden_linear)[name = tensor<string, []>("input_217")];
tensor<fp32, [1, ?, 256]> input_221 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wo_weight, x = input_217)[name = tensor<string, []>("linear_83")];
tensor<fp32, [1, ?, 256]> hidden_states_169 = add(x = hidden_states_163, y = input_221)[name = tensor<string, []>("hidden_states_169")];
tensor<fp32, []> var_17_promoted_24 = const()[name = tensor<string, []>("op_17_promoted_24"), val = tensor<fp32, []>(0x1p+1)];
tensor<fp32, [1, ?, 256]> var_1252 = pow(x = hidden_states_169, y = var_17_promoted_24)[name = tensor<string, []>("op_1252")];
tensor<int32, [1]> variance_axes_0 = const()[name = tensor<string, []>("variance_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<bool, []> variance_keep_dims_0 = const()[name = tensor<string, []>("variance_keep_dims_0"), val = tensor<bool, []>(true)];
tensor<fp32, [1, ?, 1]> variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_1252)[name = tensor<string, []>("variance")];
tensor<fp32, []> var_1255 = const()[name = tensor<string, []>("op_1255"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
tensor<fp32, [1, ?, 1]> var_1256 = add(x = variance, y = var_1255)[name = tensor<string, []>("op_1256")];
tensor<fp32, []> var_1257_epsilon_0 = const()[name = tensor<string, []>("op_1257_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
tensor<fp32, [1, ?, 1]> var_1257 = rsqrt(epsilon = var_1257_epsilon_0, x = var_1256)[name = tensor<string, []>("op_1257")];
tensor<fp32, [1, ?, 256]> hidden_states = mul(x = hidden_states_169, y = var_1257)[name = tensor<string, []>("hidden_states")];
tensor<fp32, [1, ?, 256]> last_hidden_state = mul(x = encoder_final_layer_norm_weight, y = hidden_states)[name = tensor<string, []>("input")];
} -> (last_hidden_state);
} |