program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor attention_mask, tensor input_ids) [FlexibleShapeInformation = tuple, dict, tensor>>, tuple, dict, list, ?>>>>((("DefaultShapes", {{"attention_mask", [1, 16]}, {"input_ids", [1, 16]}}), ("RangeDims", {{"attention_mask", [[1, 1], [1, 64]]}, {"input_ids", [[1, 1], [1, 64]]}})))] { tensor encoder_embed_tokens_weight = const()[name = tensor("encoder_embed_tokens_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor encoder_block_0_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_0_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(393344)))]; tensor encoder_block_0_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_0_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394432)))]; tensor encoder_block_0_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_0_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(787712)))]; tensor encoder_block_0_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_0_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1180992)))]; tensor encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight = const()[name = tensor("encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1574272)))]; tensor encoder_block_0_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_0_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1575104)))]; tensor encoder_block_0_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_0_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1968384)))]; tensor encoder_block_0_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_0_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1969472)))]; tensor encoder_block_0_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_0_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3018112)))]; tensor encoder_block_0_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_0_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4066752)))]; tensor encoder_block_1_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_1_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5115392)))]; tensor encoder_block_1_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_1_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5116480)))]; tensor encoder_block_1_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_1_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5509760)))]; tensor encoder_block_1_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_1_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5903040)))]; tensor encoder_block_1_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_1_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6296320)))]; tensor encoder_block_1_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_1_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6689600)))]; tensor encoder_block_1_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_1_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6690688)))]; tensor encoder_block_1_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_1_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7739328)))]; tensor encoder_block_1_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_1_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8787968)))]; tensor encoder_block_2_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_2_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9836608)))]; tensor encoder_block_2_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_2_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9837696)))]; tensor encoder_block_2_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_2_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10230976)))]; tensor encoder_block_2_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_2_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10624256)))]; tensor encoder_block_2_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_2_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11017536)))]; tensor encoder_block_2_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_2_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11410816)))]; tensor encoder_block_2_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_2_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11411904)))]; tensor encoder_block_2_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_2_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12460544)))]; tensor encoder_block_2_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_2_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13509184)))]; tensor encoder_block_3_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_3_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14557824)))]; tensor encoder_block_3_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_3_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14558912)))]; tensor encoder_block_3_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_3_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14952192)))]; tensor encoder_block_3_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_3_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15345472)))]; tensor encoder_block_3_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_3_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15738752)))]; tensor encoder_block_3_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_3_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16132032)))]; tensor encoder_block_3_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_3_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16133120)))]; tensor encoder_block_3_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_3_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17181760)))]; tensor encoder_block_3_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_3_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18230400)))]; tensor encoder_block_4_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_4_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19279040)))]; tensor encoder_block_4_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_4_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19280128)))]; tensor encoder_block_4_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_4_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19673408)))]; tensor encoder_block_4_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_4_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20066688)))]; tensor encoder_block_4_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_4_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20459968)))]; tensor encoder_block_4_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_4_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20853248)))]; tensor encoder_block_4_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_4_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20854336)))]; tensor encoder_block_4_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_4_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21902976)))]; tensor encoder_block_4_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_4_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22951616)))]; tensor encoder_block_5_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_5_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24000256)))]; tensor encoder_block_5_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_5_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24001344)))]; tensor encoder_block_5_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_5_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24394624)))]; tensor encoder_block_5_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_5_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24787904)))]; tensor encoder_block_5_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_5_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25181184)))]; tensor encoder_block_5_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_5_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25574464)))]; tensor encoder_block_5_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_5_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25575552)))]; tensor encoder_block_5_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_5_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26624192)))]; tensor encoder_block_5_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_5_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27672832)))]; tensor encoder_block_6_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_6_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28721472)))]; tensor encoder_block_6_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_6_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28722560)))]; tensor encoder_block_6_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_6_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29115840)))]; tensor encoder_block_6_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_6_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29509120)))]; tensor encoder_block_6_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_6_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29902400)))]; tensor encoder_block_6_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_6_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30295680)))]; tensor encoder_block_6_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_6_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30296768)))]; tensor encoder_block_6_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_6_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31345408)))]; tensor encoder_block_6_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_6_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32394048)))]; tensor encoder_block_7_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_7_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33442688)))]; tensor encoder_block_7_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_7_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33443776)))]; tensor encoder_block_7_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_7_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33837056)))]; tensor encoder_block_7_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_7_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34230336)))]; tensor encoder_block_7_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_7_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34623616)))]; tensor encoder_block_7_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_7_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35016896)))]; tensor encoder_block_7_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_7_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35017984)))]; tensor encoder_block_7_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_7_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36066624)))]; tensor encoder_block_7_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_7_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37115264)))]; tensor encoder_block_8_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_8_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38163904)))]; tensor encoder_block_8_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_8_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38164992)))]; tensor encoder_block_8_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_8_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38558272)))]; tensor encoder_block_8_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_8_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38951552)))]; tensor encoder_block_8_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_8_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39344832)))]; tensor encoder_block_8_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_8_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39738112)))]; tensor encoder_block_8_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_8_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39739200)))]; tensor encoder_block_8_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_8_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40787840)))]; tensor encoder_block_8_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_8_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41836480)))]; tensor encoder_block_9_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_9_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42885120)))]; tensor encoder_block_9_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_9_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42886208)))]; tensor encoder_block_9_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_9_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43279488)))]; tensor encoder_block_9_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_9_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43672768)))]; tensor encoder_block_9_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_9_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44066048)))]; tensor encoder_block_9_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_9_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44459328)))]; tensor encoder_block_9_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_9_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44460416)))]; tensor encoder_block_9_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_9_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45509056)))]; tensor encoder_block_9_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_9_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46557696)))]; tensor encoder_block_10_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_10_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47606336)))]; tensor encoder_block_10_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_10_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47607424)))]; tensor encoder_block_10_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_10_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48000704)))]; tensor encoder_block_10_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_10_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48393984)))]; tensor encoder_block_10_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_10_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48787264)))]; tensor encoder_block_10_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_10_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49180544)))]; tensor encoder_block_10_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_10_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49181632)))]; tensor encoder_block_10_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_10_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50230272)))]; tensor encoder_block_10_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_10_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51278912)))]; tensor encoder_block_11_layer_0_layer_norm_weight = const()[name = tensor("encoder_block_11_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52327552)))]; tensor encoder_block_11_layer_0_SelfAttention_q_weight = const()[name = tensor("encoder_block_11_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52328640)))]; tensor encoder_block_11_layer_0_SelfAttention_k_weight = const()[name = tensor("encoder_block_11_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52721920)))]; tensor encoder_block_11_layer_0_SelfAttention_v_weight = const()[name = tensor("encoder_block_11_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53115200)))]; tensor encoder_block_11_layer_0_SelfAttention_o_weight = const()[name = tensor("encoder_block_11_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53508480)))]; tensor encoder_block_11_layer_1_layer_norm_weight = const()[name = tensor("encoder_block_11_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53901760)))]; tensor encoder_block_11_layer_1_DenseReluDense_wi_0_weight = const()[name = tensor("encoder_block_11_layer_1_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53902848)))]; tensor encoder_block_11_layer_1_DenseReluDense_wi_1_weight = const()[name = tensor("encoder_block_11_layer_1_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54951488)))]; tensor encoder_block_11_layer_1_DenseReluDense_wo_weight = const()[name = tensor("encoder_block_11_layer_1_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56000128)))]; tensor encoder_final_layer_norm_weight = const()[name = tensor("encoder_final_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57048768)))]; tensor var_7 = const()[name = tensor("op_7"), val = tensor(8)]; tensor var_13 = const()[name = tensor("op_13"), val = tensor(0x1p+0)]; tensor var_19 = const()[name = tensor("op_19"), val = tensor(0)]; tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; tensor input_3_batch_dims_0 = const()[name = tensor("input_3_batch_dims_0"), val = tensor(0)]; tensor input_3_validate_indices_0 = const()[name = tensor("input_3_validate_indices_0"), val = tensor(false)]; tensor greater_equal_1_y_0 = const()[name = tensor("greater_equal_1_y_0"), val = tensor(0)]; tensor greater_equal_1 = greater_equal(x = input_ids, y = greater_equal_1_y_0)[name = tensor("greater_equal_1")]; tensor slice_by_index_1 = const()[name = tensor("slice_by_index_1"), val = tensor(384)]; tensor add_1 = add(x = input_ids, y = slice_by_index_1)[name = tensor("add_1")]; tensor select_1 = select(a = input_ids, b = add_1, cond = greater_equal_1)[name = tensor("select_1")]; tensor input_3_axis_1 = const()[name = tensor("input_3_axis_1"), val = tensor(0)]; tensor input_3 = gather(axis = input_3_axis_1, batch_dims = input_3_batch_dims_0, indices = select_1, validate_indices = input_3_validate_indices_0, x = encoder_embed_tokens_weight)[name = tensor("input_3")]; tensor var_55_axes_0 = const()[name = tensor("op_55_axes_0"), val = tensor([1])]; tensor var_55 = expand_dims(axes = var_55_axes_0, x = attention_mask)[name = tensor("op_55")]; tensor var_56_axes_0 = const()[name = tensor("op_56_axes_0"), val = tensor([2])]; tensor var_56 = expand_dims(axes = var_56_axes_0, x = var_55)[name = tensor("op_56")]; tensor var_58_dtype_0 = const()[name = tensor("op_58_dtype_0"), val = tensor("fp32")]; tensor var_58 = cast(dtype = var_58_dtype_0, x = var_56)[name = tensor("cast_58")]; tensor var_59 = sub(x = var_13, y = var_58)[name = tensor("op_59")]; tensor var_60 = const()[name = tensor("op_60"), val = tensor(-0x1.fffffep+127)]; tensor mask = mul(x = var_59, y = var_60)[name = tensor("mask")]; tensor var_17_promoted = const()[name = tensor("op_17_promoted"), val = tensor(0x1p+1)]; tensor var_71 = pow(x = input_3, y = var_17_promoted)[name = tensor("op_71")]; tensor variance_1_axes_0 = const()[name = tensor("variance_1_axes_0"), val = tensor([-1])]; tensor variance_1_keep_dims_0 = const()[name = tensor("variance_1_keep_dims_0"), val = tensor(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_71)[name = tensor("variance_1")]; tensor var_74 = const()[name = tensor("op_74"), val = tensor(0x1.0c6f7ap-20)]; tensor var_75 = add(x = variance_1, y = var_74)[name = tensor("op_75")]; tensor var_76_epsilon_0 = const()[name = tensor("op_76_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_76 = rsqrt(epsilon = var_76_epsilon_0, x = var_75)[name = tensor("op_76")]; tensor hidden_states_5 = mul(x = input_3, y = var_76)[name = tensor("hidden_states_5")]; tensor hidden_states_7 = mul(x = encoder_block_0_layer_0_layer_norm_weight, y = hidden_states_5)[name = tensor("hidden_states_7")]; tensor var_88_shape = shape(x = hidden_states_7)[name = tensor("op_88_shape")]; tensor gather_2_batch_dims_0 = const()[name = tensor("gather_2_batch_dims_0"), val = tensor(0)]; tensor gather_2_validate_indices_0 = const()[name = tensor("gather_2_validate_indices_0"), val = tensor(false)]; tensor select_2 = const()[name = tensor("select_2"), val = tensor(1)]; tensor gather_2_axis_1 = const()[name = tensor("gather_2_axis_1"), val = tensor(0)]; tensor gather_2 = gather(axis = gather_2_axis_1, batch_dims = gather_2_batch_dims_0, indices = select_2, validate_indices = gather_2_validate_indices_0, x = var_88_shape)[name = tensor("gather_2")]; tensor linear_0_bias_0 = const()[name = tensor("linear_0_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57049856)))]; tensor states_1 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_q_weight, x = hidden_states_7)[name = tensor("linear_0")]; tensor var_91 = const()[name = tensor("op_91"), val = tensor([1, -1, 6, 64])]; tensor var_92 = reshape(shape = var_91, x = states_1)[name = tensor("op_92")]; tensor states_3 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_k_weight, x = hidden_states_7)[name = tensor("linear_1")]; tensor var_96 = const()[name = tensor("op_96"), val = tensor([1, -1, 6, 64])]; tensor var_97 = reshape(shape = var_96, x = states_3)[name = tensor("op_97")]; tensor states_5 = linear(bias = linear_0_bias_0, weight = encoder_block_0_layer_0_SelfAttention_v_weight, x = hidden_states_7)[name = tensor("linear_2")]; tensor var_101 = const()[name = tensor("op_101"), val = tensor([1, -1, 6, 64])]; tensor var_102 = reshape(shape = var_101, x = states_5)[name = tensor("op_102")]; tensor value_states_1_perm_0 = const()[name = tensor("value_states_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_1_transpose_x_0 = const()[name = tensor("scores_1_transpose_x_0"), val = tensor(false)]; tensor scores_1_transpose_y_0 = const()[name = tensor("scores_1_transpose_y_0"), val = tensor(false)]; tensor transpose_36_perm_0 = const()[name = tensor("transpose_36_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_37_perm_0 = const()[name = tensor("transpose_37_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_37 = transpose(perm = transpose_37_perm_0, x = var_97)[name = tensor("transpose_106")]; tensor transpose_36 = transpose(perm = transpose_36_perm_0, x = var_92)[name = tensor("transpose_107")]; tensor scores_1 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_36, y = transpose_37)[name = tensor("scores_1")]; tensor const_0 = const()[name = tensor("const_0"), val = tensor(0)]; tensor const_1 = const()[name = tensor("const_1"), val = tensor(1)]; tensor var_106 = range_1d(end = gather_2, start = const_0, step = const_1)[name = tensor("op_106")]; tensor context_position_axes_0 = const()[name = tensor("context_position_axes_0"), val = tensor([1])]; tensor context_position = expand_dims(axes = context_position_axes_0, x = var_106)[name = tensor("context_position")]; tensor var_110_axes_0 = const()[name = tensor("op_110_axes_0"), val = tensor([0])]; tensor var_110 = expand_dims(axes = var_110_axes_0, x = var_106)[name = tensor("op_110")]; tensor relative_position_1 = sub(x = var_110, y = context_position)[name = tensor("relative_position_1")]; tensor var_113 = greater(x = relative_position_1, y = var_19)[name = tensor("op_113")]; tensor var_114_dtype_0 = const()[name = tensor("op_114_dtype_0"), val = tensor("int32")]; tensor var_115 = const()[name = tensor("op_115"), val = tensor(16)]; tensor var_114 = cast(dtype = var_114_dtype_0, x = var_113)[name = tensor("cast_57")]; tensor var_116 = mul(x = var_114, y = var_115)[name = tensor("op_116")]; tensor relative_position = abs(x = relative_position_1)[name = tensor("relative_position")]; tensor is_small = less(x = relative_position, y = var_7)[name = tensor("is_small")]; tensor var_121_dtype_0 = const()[name = tensor("op_121_dtype_0"), val = tensor("fp32")]; tensor _inversed_123_y_0 = const()[name = tensor("_inversed_123_y_0"), val = tensor(0x1p-3)]; tensor var_121 = cast(dtype = var_121_dtype_0, x = relative_position)[name = tensor("cast_56")]; tensor _inversed_123 = mul(x = var_121, y = _inversed_123_y_0)[name = tensor("_inversed_123")]; tensor var_124_epsilon_0 = const()[name = tensor("op_124_epsilon_0"), val = tensor(0x1p-149)]; tensor var_124 = log(epsilon = var_124_epsilon_0, x = _inversed_123)[name = tensor("op_124")]; tensor _inversed_126_y_0 = const()[name = tensor("_inversed_126_y_0"), val = tensor(0x1.715476p-2)]; tensor _inversed_126 = mul(x = var_124, y = _inversed_126_y_0)[name = tensor("_inversed_126")]; tensor var_127_promoted = const()[name = tensor("op_127_promoted"), val = tensor(0x1p+3)]; tensor var_128 = mul(x = _inversed_126, y = var_127_promoted)[name = tensor("op_128")]; tensor var_129_dtype_0 = const()[name = tensor("op_129_dtype_0"), val = tensor("int32")]; tensor var_130 = const()[name = tensor("op_130"), val = tensor(8)]; tensor var_129 = cast(dtype = var_129_dtype_0, x = var_128)[name = tensor("cast_55")]; tensor relative_position_if_large_1 = add(x = var_129, y = var_130)[name = tensor("relative_position_if_large_1")]; tensor var_132_value_0 = const()[name = tensor("op_132_value_0"), val = tensor(15)]; tensor var_132 = fill_like(ref_tensor = relative_position_if_large_1, value = var_132_value_0)[name = tensor("op_132")]; tensor relative_position_if_large = minimum(x = relative_position_if_large_1, y = var_132)[name = tensor("relative_position_if_large")]; tensor var_134 = select(a = relative_position, b = relative_position_if_large, cond = is_small)[name = tensor("op_134")]; tensor input_5 = add(x = var_116, y = var_134)[name = tensor("input_5")]; tensor values_batch_dims_0 = const()[name = tensor("values_batch_dims_0"), val = tensor(0)]; tensor values_validate_indices_0 = const()[name = tensor("values_validate_indices_0"), val = tensor(false)]; tensor greater_equal_3_y_0 = const()[name = tensor("greater_equal_3_y_0"), val = tensor(0)]; tensor greater_equal_3 = greater_equal(x = input_5, y = greater_equal_3_y_0)[name = tensor("greater_equal_3")]; tensor slice_by_index_3 = const()[name = tensor("slice_by_index_3"), val = tensor(32)]; tensor add_3 = add(x = input_5, y = slice_by_index_3)[name = tensor("add_3")]; tensor select_3 = select(a = input_5, b = add_3, cond = greater_equal_3)[name = tensor("select_3")]; tensor values_axis_1 = const()[name = tensor("values_axis_1"), val = tensor(0)]; tensor values = gather(axis = values_axis_1, batch_dims = values_batch_dims_0, indices = select_3, validate_indices = values_validate_indices_0, x = encoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight)[name = tensor("values")]; tensor var_138 = const()[name = tensor("op_138"), val = tensor([2, 0, 1])]; tensor position_bias_1_axes_0 = const()[name = tensor("position_bias_1_axes_0"), val = tensor([0])]; tensor var_139 = transpose(perm = var_138, x = values)[name = tensor("transpose_105")]; tensor position_bias_1 = expand_dims(axes = position_bias_1_axes_0, x = var_139)[name = tensor("position_bias_1")]; tensor position_bias = add(x = position_bias_1, y = mask)[name = tensor("position_bias")]; tensor scores_3 = add(x = scores_1, y = position_bias)[name = tensor("scores_3")]; tensor var_144 = softmax(axis = var_21, x = scores_3)[name = tensor("op_144")]; tensor states_7_transpose_x_0 = const()[name = tensor("states_7_transpose_x_0"), val = tensor(false)]; tensor states_7_transpose_y_0 = const()[name = tensor("states_7_transpose_y_0"), val = tensor(false)]; tensor value_states_1 = transpose(perm = value_states_1_perm_0, x = var_102)[name = tensor("transpose_108")]; tensor states_7 = matmul(transpose_x = states_7_transpose_x_0, transpose_y = states_7_transpose_y_0, x = var_144, y = value_states_1)[name = tensor("states_7")]; tensor var_148_perm_0 = const()[name = tensor("op_148_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_150 = const()[name = tensor("op_150"), val = tensor([1, -1, 384])]; tensor var_148 = transpose(perm = var_148_perm_0, x = states_7)[name = tensor("transpose_104")]; tensor input_11 = reshape(shape = var_150, x = var_148)[name = tensor("input_11")]; tensor linear_3_bias_0 = const()[name = tensor("linear_3_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57051456)))]; tensor input_13 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_0_SelfAttention_o_weight, x = input_11)[name = tensor("linear_3")]; tensor hidden_states_9 = add(x = input_3, y = input_13)[name = tensor("hidden_states_9")]; tensor var_17_promoted_1 = const()[name = tensor("op_17_promoted_1"), val = tensor(0x1p+1)]; tensor var_166 = pow(x = hidden_states_9, y = var_17_promoted_1)[name = tensor("op_166")]; tensor variance_3_axes_0 = const()[name = tensor("variance_3_axes_0"), val = tensor([-1])]; tensor variance_3_keep_dims_0 = const()[name = tensor("variance_3_keep_dims_0"), val = tensor(true)]; tensor variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_166)[name = tensor("variance_3")]; tensor var_169 = const()[name = tensor("op_169"), val = tensor(0x1.0c6f7ap-20)]; tensor var_170 = add(x = variance_3, y = var_169)[name = tensor("op_170")]; tensor var_171_epsilon_0 = const()[name = tensor("op_171_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_171 = rsqrt(epsilon = var_171_epsilon_0, x = var_170)[name = tensor("op_171")]; tensor hidden_states_13 = mul(x = hidden_states_9, y = var_171)[name = tensor("hidden_states_13")]; tensor input_15 = mul(x = encoder_block_0_layer_1_layer_norm_weight, y = hidden_states_13)[name = tensor("input_15")]; tensor linear_4_bias_0 = const()[name = tensor("linear_4_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57052544)))]; tensor input_17 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_0_weight, x = input_15)[name = tensor("linear_4")]; tensor hidden_gelu_1_mode_0 = const()[name = tensor("hidden_gelu_1_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_1 = gelu(mode = hidden_gelu_1_mode_0, x = input_17)[name = tensor("hidden_gelu_1")]; tensor hidden_linear_1 = linear(bias = linear_4_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wi_1_weight, x = input_15)[name = tensor("linear_5")]; tensor input_19 = mul(x = hidden_gelu_1, y = hidden_linear_1)[name = tensor("input_19")]; tensor input_23 = linear(bias = linear_3_bias_0, weight = encoder_block_0_layer_1_DenseReluDense_wo_weight, x = input_19)[name = tensor("linear_6")]; tensor hidden_states_15 = add(x = hidden_states_9, y = input_23)[name = tensor("hidden_states_15")]; tensor var_17_promoted_2 = const()[name = tensor("op_17_promoted_2"), val = tensor(0x1p+1)]; tensor var_213 = pow(x = hidden_states_15, y = var_17_promoted_2)[name = tensor("op_213")]; tensor variance_5_axes_0 = const()[name = tensor("variance_5_axes_0"), val = tensor([-1])]; tensor variance_5_keep_dims_0 = const()[name = tensor("variance_5_keep_dims_0"), val = tensor(true)]; tensor variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_213)[name = tensor("variance_5")]; tensor var_216 = const()[name = tensor("op_216"), val = tensor(0x1.0c6f7ap-20)]; tensor var_217 = add(x = variance_5, y = var_216)[name = tensor("op_217")]; tensor var_218_epsilon_0 = const()[name = tensor("op_218_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_218 = rsqrt(epsilon = var_218_epsilon_0, x = var_217)[name = tensor("op_218")]; tensor hidden_states_19 = mul(x = hidden_states_15, y = var_218)[name = tensor("hidden_states_19")]; tensor hidden_states_21 = mul(x = encoder_block_1_layer_0_layer_norm_weight, y = hidden_states_19)[name = tensor("hidden_states_21")]; tensor states_9 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_q_weight, x = hidden_states_21)[name = tensor("linear_7")]; tensor var_231 = const()[name = tensor("op_231"), val = tensor([1, -1, 6, 64])]; tensor var_232 = reshape(shape = var_231, x = states_9)[name = tensor("op_232")]; tensor states_11 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_k_weight, x = hidden_states_21)[name = tensor("linear_8")]; tensor var_236 = const()[name = tensor("op_236"), val = tensor([1, -1, 6, 64])]; tensor var_237 = reshape(shape = var_236, x = states_11)[name = tensor("op_237")]; tensor states_13 = linear(bias = linear_0_bias_0, weight = encoder_block_1_layer_0_SelfAttention_v_weight, x = hidden_states_21)[name = tensor("linear_9")]; tensor var_241 = const()[name = tensor("op_241"), val = tensor([1, -1, 6, 64])]; tensor var_242 = reshape(shape = var_241, x = states_13)[name = tensor("op_242")]; tensor value_states_3_perm_0 = const()[name = tensor("value_states_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_5_transpose_x_0 = const()[name = tensor("scores_5_transpose_x_0"), val = tensor(false)]; tensor scores_5_transpose_y_0 = const()[name = tensor("scores_5_transpose_y_0"), val = tensor(false)]; tensor transpose_38_perm_0 = const()[name = tensor("transpose_38_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_39_perm_0 = const()[name = tensor("transpose_39_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_39 = transpose(perm = transpose_39_perm_0, x = var_237)[name = tensor("transpose_101")]; tensor transpose_38 = transpose(perm = transpose_38_perm_0, x = var_232)[name = tensor("transpose_102")]; tensor scores_5 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_38, y = transpose_39)[name = tensor("scores_5")]; tensor scores_7 = add(x = scores_5, y = position_bias)[name = tensor("scores_7")]; tensor var_248 = softmax(axis = var_21, x = scores_7)[name = tensor("op_248")]; tensor states_15_transpose_x_0 = const()[name = tensor("states_15_transpose_x_0"), val = tensor(false)]; tensor states_15_transpose_y_0 = const()[name = tensor("states_15_transpose_y_0"), val = tensor(false)]; tensor value_states_3 = transpose(perm = value_states_3_perm_0, x = var_242)[name = tensor("transpose_103")]; tensor states_15 = matmul(transpose_x = states_15_transpose_x_0, transpose_y = states_15_transpose_y_0, x = var_248, y = value_states_3)[name = tensor("states_15")]; tensor var_252_perm_0 = const()[name = tensor("op_252_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_254 = const()[name = tensor("op_254"), val = tensor([1, -1, 384])]; tensor var_252 = transpose(perm = var_252_perm_0, x = states_15)[name = tensor("transpose_100")]; tensor input_29 = reshape(shape = var_254, x = var_252)[name = tensor("input_29")]; tensor input_31 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_0_SelfAttention_o_weight, x = input_29)[name = tensor("linear_10")]; tensor hidden_states_23 = add(x = hidden_states_15, y = input_31)[name = tensor("hidden_states_23")]; tensor var_17_promoted_3 = const()[name = tensor("op_17_promoted_3"), val = tensor(0x1p+1)]; tensor var_264 = pow(x = hidden_states_23, y = var_17_promoted_3)[name = tensor("op_264")]; tensor variance_7_axes_0 = const()[name = tensor("variance_7_axes_0"), val = tensor([-1])]; tensor variance_7_keep_dims_0 = const()[name = tensor("variance_7_keep_dims_0"), val = tensor(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_264)[name = tensor("variance_7")]; tensor var_267 = const()[name = tensor("op_267"), val = tensor(0x1.0c6f7ap-20)]; tensor var_268 = add(x = variance_7, y = var_267)[name = tensor("op_268")]; tensor var_269_epsilon_0 = const()[name = tensor("op_269_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_269 = rsqrt(epsilon = var_269_epsilon_0, x = var_268)[name = tensor("op_269")]; tensor hidden_states_27 = mul(x = hidden_states_23, y = var_269)[name = tensor("hidden_states_27")]; tensor input_33 = mul(x = encoder_block_1_layer_1_layer_norm_weight, y = hidden_states_27)[name = tensor("input_33")]; tensor input_35 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_0_weight, x = input_33)[name = tensor("linear_11")]; tensor hidden_gelu_3_mode_0 = const()[name = tensor("hidden_gelu_3_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_3 = gelu(mode = hidden_gelu_3_mode_0, x = input_35)[name = tensor("hidden_gelu_3")]; tensor hidden_linear_3 = linear(bias = linear_4_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wi_1_weight, x = input_33)[name = tensor("linear_12")]; tensor input_37 = mul(x = hidden_gelu_3, y = hidden_linear_3)[name = tensor("input_37")]; tensor input_41 = linear(bias = linear_3_bias_0, weight = encoder_block_1_layer_1_DenseReluDense_wo_weight, x = input_37)[name = tensor("linear_13")]; tensor hidden_states_29 = add(x = hidden_states_23, y = input_41)[name = tensor("hidden_states_29")]; tensor var_17_promoted_4 = const()[name = tensor("op_17_promoted_4"), val = tensor(0x1p+1)]; tensor var_308 = pow(x = hidden_states_29, y = var_17_promoted_4)[name = tensor("op_308")]; tensor variance_9_axes_0 = const()[name = tensor("variance_9_axes_0"), val = tensor([-1])]; tensor variance_9_keep_dims_0 = const()[name = tensor("variance_9_keep_dims_0"), val = tensor(true)]; tensor variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_308)[name = tensor("variance_9")]; tensor var_311 = const()[name = tensor("op_311"), val = tensor(0x1.0c6f7ap-20)]; tensor var_312 = add(x = variance_9, y = var_311)[name = tensor("op_312")]; tensor var_313_epsilon_0 = const()[name = tensor("op_313_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_313 = rsqrt(epsilon = var_313_epsilon_0, x = var_312)[name = tensor("op_313")]; tensor hidden_states_33 = mul(x = hidden_states_29, y = var_313)[name = tensor("hidden_states_33")]; tensor hidden_states_35 = mul(x = encoder_block_2_layer_0_layer_norm_weight, y = hidden_states_33)[name = tensor("hidden_states_35")]; tensor states_17 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_q_weight, x = hidden_states_35)[name = tensor("linear_14")]; tensor var_326 = const()[name = tensor("op_326"), val = tensor([1, -1, 6, 64])]; tensor var_327 = reshape(shape = var_326, x = states_17)[name = tensor("op_327")]; tensor states_19 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_k_weight, x = hidden_states_35)[name = tensor("linear_15")]; tensor var_331 = const()[name = tensor("op_331"), val = tensor([1, -1, 6, 64])]; tensor var_332 = reshape(shape = var_331, x = states_19)[name = tensor("op_332")]; tensor states_21 = linear(bias = linear_0_bias_0, weight = encoder_block_2_layer_0_SelfAttention_v_weight, x = hidden_states_35)[name = tensor("linear_16")]; tensor var_336 = const()[name = tensor("op_336"), val = tensor([1, -1, 6, 64])]; tensor var_337 = reshape(shape = var_336, x = states_21)[name = tensor("op_337")]; tensor value_states_5_perm_0 = const()[name = tensor("value_states_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_9_transpose_x_0 = const()[name = tensor("scores_9_transpose_x_0"), val = tensor(false)]; tensor scores_9_transpose_y_0 = const()[name = tensor("scores_9_transpose_y_0"), val = tensor(false)]; tensor transpose_40_perm_0 = const()[name = tensor("transpose_40_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_41_perm_0 = const()[name = tensor("transpose_41_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_41 = transpose(perm = transpose_41_perm_0, x = var_332)[name = tensor("transpose_97")]; tensor transpose_40 = transpose(perm = transpose_40_perm_0, x = var_327)[name = tensor("transpose_98")]; tensor scores_9 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_40, y = transpose_41)[name = tensor("scores_9")]; tensor scores_11 = add(x = scores_9, y = position_bias)[name = tensor("scores_11")]; tensor var_343 = softmax(axis = var_21, x = scores_11)[name = tensor("op_343")]; tensor states_23_transpose_x_0 = const()[name = tensor("states_23_transpose_x_0"), val = tensor(false)]; tensor states_23_transpose_y_0 = const()[name = tensor("states_23_transpose_y_0"), val = tensor(false)]; tensor value_states_5 = transpose(perm = value_states_5_perm_0, x = var_337)[name = tensor("transpose_99")]; tensor states_23 = matmul(transpose_x = states_23_transpose_x_0, transpose_y = states_23_transpose_y_0, x = var_343, y = value_states_5)[name = tensor("states_23")]; tensor var_347_perm_0 = const()[name = tensor("op_347_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_349 = const()[name = tensor("op_349"), val = tensor([1, -1, 384])]; tensor var_347 = transpose(perm = var_347_perm_0, x = states_23)[name = tensor("transpose_96")]; tensor input_47 = reshape(shape = var_349, x = var_347)[name = tensor("input_47")]; tensor input_49 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_0_SelfAttention_o_weight, x = input_47)[name = tensor("linear_17")]; tensor hidden_states_37 = add(x = hidden_states_29, y = input_49)[name = tensor("hidden_states_37")]; tensor var_17_promoted_5 = const()[name = tensor("op_17_promoted_5"), val = tensor(0x1p+1)]; tensor var_359 = pow(x = hidden_states_37, y = var_17_promoted_5)[name = tensor("op_359")]; tensor variance_11_axes_0 = const()[name = tensor("variance_11_axes_0"), val = tensor([-1])]; tensor variance_11_keep_dims_0 = const()[name = tensor("variance_11_keep_dims_0"), val = tensor(true)]; tensor variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_359)[name = tensor("variance_11")]; tensor var_362 = const()[name = tensor("op_362"), val = tensor(0x1.0c6f7ap-20)]; tensor var_363 = add(x = variance_11, y = var_362)[name = tensor("op_363")]; tensor var_364_epsilon_0 = const()[name = tensor("op_364_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_364 = rsqrt(epsilon = var_364_epsilon_0, x = var_363)[name = tensor("op_364")]; tensor hidden_states_41 = mul(x = hidden_states_37, y = var_364)[name = tensor("hidden_states_41")]; tensor input_51 = mul(x = encoder_block_2_layer_1_layer_norm_weight, y = hidden_states_41)[name = tensor("input_51")]; tensor input_53 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_0_weight, x = input_51)[name = tensor("linear_18")]; tensor hidden_gelu_5_mode_0 = const()[name = tensor("hidden_gelu_5_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_5 = gelu(mode = hidden_gelu_5_mode_0, x = input_53)[name = tensor("hidden_gelu_5")]; tensor hidden_linear_5 = linear(bias = linear_4_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wi_1_weight, x = input_51)[name = tensor("linear_19")]; tensor input_55 = mul(x = hidden_gelu_5, y = hidden_linear_5)[name = tensor("input_55")]; tensor input_59 = linear(bias = linear_3_bias_0, weight = encoder_block_2_layer_1_DenseReluDense_wo_weight, x = input_55)[name = tensor("linear_20")]; tensor hidden_states_43 = add(x = hidden_states_37, y = input_59)[name = tensor("hidden_states_43")]; tensor var_17_promoted_6 = const()[name = tensor("op_17_promoted_6"), val = tensor(0x1p+1)]; tensor var_403 = pow(x = hidden_states_43, y = var_17_promoted_6)[name = tensor("op_403")]; tensor variance_13_axes_0 = const()[name = tensor("variance_13_axes_0"), val = tensor([-1])]; tensor variance_13_keep_dims_0 = const()[name = tensor("variance_13_keep_dims_0"), val = tensor(true)]; tensor variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_403)[name = tensor("variance_13")]; tensor var_406 = const()[name = tensor("op_406"), val = tensor(0x1.0c6f7ap-20)]; tensor var_407 = add(x = variance_13, y = var_406)[name = tensor("op_407")]; tensor var_408_epsilon_0 = const()[name = tensor("op_408_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_408 = rsqrt(epsilon = var_408_epsilon_0, x = var_407)[name = tensor("op_408")]; tensor hidden_states_47 = mul(x = hidden_states_43, y = var_408)[name = tensor("hidden_states_47")]; tensor hidden_states_49 = mul(x = encoder_block_3_layer_0_layer_norm_weight, y = hidden_states_47)[name = tensor("hidden_states_49")]; tensor states_25 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_q_weight, x = hidden_states_49)[name = tensor("linear_21")]; tensor var_421 = const()[name = tensor("op_421"), val = tensor([1, -1, 6, 64])]; tensor var_422 = reshape(shape = var_421, x = states_25)[name = tensor("op_422")]; tensor states_27 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_k_weight, x = hidden_states_49)[name = tensor("linear_22")]; tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, -1, 6, 64])]; tensor var_427 = reshape(shape = var_426, x = states_27)[name = tensor("op_427")]; tensor states_29 = linear(bias = linear_0_bias_0, weight = encoder_block_3_layer_0_SelfAttention_v_weight, x = hidden_states_49)[name = tensor("linear_23")]; tensor var_431 = const()[name = tensor("op_431"), val = tensor([1, -1, 6, 64])]; tensor var_432 = reshape(shape = var_431, x = states_29)[name = tensor("op_432")]; tensor value_states_7_perm_0 = const()[name = tensor("value_states_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_13_transpose_x_0 = const()[name = tensor("scores_13_transpose_x_0"), val = tensor(false)]; tensor scores_13_transpose_y_0 = const()[name = tensor("scores_13_transpose_y_0"), val = tensor(false)]; tensor transpose_42_perm_0 = const()[name = tensor("transpose_42_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_43_perm_0 = const()[name = tensor("transpose_43_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_43 = transpose(perm = transpose_43_perm_0, x = var_427)[name = tensor("transpose_93")]; tensor transpose_42 = transpose(perm = transpose_42_perm_0, x = var_422)[name = tensor("transpose_94")]; tensor scores_13 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_42, y = transpose_43)[name = tensor("scores_13")]; tensor scores_15 = add(x = scores_13, y = position_bias)[name = tensor("scores_15")]; tensor var_438 = softmax(axis = var_21, x = scores_15)[name = tensor("op_438")]; tensor states_31_transpose_x_0 = const()[name = tensor("states_31_transpose_x_0"), val = tensor(false)]; tensor states_31_transpose_y_0 = const()[name = tensor("states_31_transpose_y_0"), val = tensor(false)]; tensor value_states_7 = transpose(perm = value_states_7_perm_0, x = var_432)[name = tensor("transpose_95")]; tensor states_31 = matmul(transpose_x = states_31_transpose_x_0, transpose_y = states_31_transpose_y_0, x = var_438, y = value_states_7)[name = tensor("states_31")]; tensor var_442_perm_0 = const()[name = tensor("op_442_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_444 = const()[name = tensor("op_444"), val = tensor([1, -1, 384])]; tensor var_442 = transpose(perm = var_442_perm_0, x = states_31)[name = tensor("transpose_92")]; tensor input_65 = reshape(shape = var_444, x = var_442)[name = tensor("input_65")]; tensor input_67 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_0_SelfAttention_o_weight, x = input_65)[name = tensor("linear_24")]; tensor hidden_states_51 = add(x = hidden_states_43, y = input_67)[name = tensor("hidden_states_51")]; tensor var_17_promoted_7 = const()[name = tensor("op_17_promoted_7"), val = tensor(0x1p+1)]; tensor var_454 = pow(x = hidden_states_51, y = var_17_promoted_7)[name = tensor("op_454")]; tensor variance_15_axes_0 = const()[name = tensor("variance_15_axes_0"), val = tensor([-1])]; tensor variance_15_keep_dims_0 = const()[name = tensor("variance_15_keep_dims_0"), val = tensor(true)]; tensor variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_454)[name = tensor("variance_15")]; tensor var_457 = const()[name = tensor("op_457"), val = tensor(0x1.0c6f7ap-20)]; tensor var_458 = add(x = variance_15, y = var_457)[name = tensor("op_458")]; tensor var_459_epsilon_0 = const()[name = tensor("op_459_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_459 = rsqrt(epsilon = var_459_epsilon_0, x = var_458)[name = tensor("op_459")]; tensor hidden_states_55 = mul(x = hidden_states_51, y = var_459)[name = tensor("hidden_states_55")]; tensor input_69 = mul(x = encoder_block_3_layer_1_layer_norm_weight, y = hidden_states_55)[name = tensor("input_69")]; tensor input_71 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_0_weight, x = input_69)[name = tensor("linear_25")]; tensor hidden_gelu_7_mode_0 = const()[name = tensor("hidden_gelu_7_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_7 = gelu(mode = hidden_gelu_7_mode_0, x = input_71)[name = tensor("hidden_gelu_7")]; tensor hidden_linear_7 = linear(bias = linear_4_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wi_1_weight, x = input_69)[name = tensor("linear_26")]; tensor input_73 = mul(x = hidden_gelu_7, y = hidden_linear_7)[name = tensor("input_73")]; tensor input_77 = linear(bias = linear_3_bias_0, weight = encoder_block_3_layer_1_DenseReluDense_wo_weight, x = input_73)[name = tensor("linear_27")]; tensor hidden_states_57 = add(x = hidden_states_51, y = input_77)[name = tensor("hidden_states_57")]; tensor var_17_promoted_8 = const()[name = tensor("op_17_promoted_8"), val = tensor(0x1p+1)]; tensor var_498 = pow(x = hidden_states_57, y = var_17_promoted_8)[name = tensor("op_498")]; tensor variance_17_axes_0 = const()[name = tensor("variance_17_axes_0"), val = tensor([-1])]; tensor variance_17_keep_dims_0 = const()[name = tensor("variance_17_keep_dims_0"), val = tensor(true)]; tensor variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_498)[name = tensor("variance_17")]; tensor var_501 = const()[name = tensor("op_501"), val = tensor(0x1.0c6f7ap-20)]; tensor var_502 = add(x = variance_17, y = var_501)[name = tensor("op_502")]; tensor var_503_epsilon_0 = const()[name = tensor("op_503_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_503 = rsqrt(epsilon = var_503_epsilon_0, x = var_502)[name = tensor("op_503")]; tensor hidden_states_61 = mul(x = hidden_states_57, y = var_503)[name = tensor("hidden_states_61")]; tensor hidden_states_63 = mul(x = encoder_block_4_layer_0_layer_norm_weight, y = hidden_states_61)[name = tensor("hidden_states_63")]; tensor states_33 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_q_weight, x = hidden_states_63)[name = tensor("linear_28")]; tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, -1, 6, 64])]; tensor var_517 = reshape(shape = var_516, x = states_33)[name = tensor("op_517")]; tensor states_35 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_k_weight, x = hidden_states_63)[name = tensor("linear_29")]; tensor var_521 = const()[name = tensor("op_521"), val = tensor([1, -1, 6, 64])]; tensor var_522 = reshape(shape = var_521, x = states_35)[name = tensor("op_522")]; tensor states_37 = linear(bias = linear_0_bias_0, weight = encoder_block_4_layer_0_SelfAttention_v_weight, x = hidden_states_63)[name = tensor("linear_30")]; tensor var_526 = const()[name = tensor("op_526"), val = tensor([1, -1, 6, 64])]; tensor var_527 = reshape(shape = var_526, x = states_37)[name = tensor("op_527")]; tensor value_states_9_perm_0 = const()[name = tensor("value_states_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_17_transpose_x_0 = const()[name = tensor("scores_17_transpose_x_0"), val = tensor(false)]; tensor scores_17_transpose_y_0 = const()[name = tensor("scores_17_transpose_y_0"), val = tensor(false)]; tensor transpose_44_perm_0 = const()[name = tensor("transpose_44_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_45_perm_0 = const()[name = tensor("transpose_45_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_45 = transpose(perm = transpose_45_perm_0, x = var_522)[name = tensor("transpose_89")]; tensor transpose_44 = transpose(perm = transpose_44_perm_0, x = var_517)[name = tensor("transpose_90")]; tensor scores_17 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_44, y = transpose_45)[name = tensor("scores_17")]; tensor scores_19 = add(x = scores_17, y = position_bias)[name = tensor("scores_19")]; tensor var_533 = softmax(axis = var_21, x = scores_19)[name = tensor("op_533")]; tensor states_39_transpose_x_0 = const()[name = tensor("states_39_transpose_x_0"), val = tensor(false)]; tensor states_39_transpose_y_0 = const()[name = tensor("states_39_transpose_y_0"), val = tensor(false)]; tensor value_states_9 = transpose(perm = value_states_9_perm_0, x = var_527)[name = tensor("transpose_91")]; tensor states_39 = matmul(transpose_x = states_39_transpose_x_0, transpose_y = states_39_transpose_y_0, x = var_533, y = value_states_9)[name = tensor("states_39")]; tensor var_537_perm_0 = const()[name = tensor("op_537_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_539 = const()[name = tensor("op_539"), val = tensor([1, -1, 384])]; tensor var_537 = transpose(perm = var_537_perm_0, x = states_39)[name = tensor("transpose_88")]; tensor input_83 = reshape(shape = var_539, x = var_537)[name = tensor("input_83")]; tensor input_85 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_0_SelfAttention_o_weight, x = input_83)[name = tensor("linear_31")]; tensor hidden_states_65 = add(x = hidden_states_57, y = input_85)[name = tensor("hidden_states_65")]; tensor var_17_promoted_9 = const()[name = tensor("op_17_promoted_9"), val = tensor(0x1p+1)]; tensor var_549 = pow(x = hidden_states_65, y = var_17_promoted_9)[name = tensor("op_549")]; tensor variance_19_axes_0 = const()[name = tensor("variance_19_axes_0"), val = tensor([-1])]; tensor variance_19_keep_dims_0 = const()[name = tensor("variance_19_keep_dims_0"), val = tensor(true)]; tensor variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_549)[name = tensor("variance_19")]; tensor var_552 = const()[name = tensor("op_552"), val = tensor(0x1.0c6f7ap-20)]; tensor var_553 = add(x = variance_19, y = var_552)[name = tensor("op_553")]; tensor var_554_epsilon_0 = const()[name = tensor("op_554_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_554 = rsqrt(epsilon = var_554_epsilon_0, x = var_553)[name = tensor("op_554")]; tensor hidden_states_69 = mul(x = hidden_states_65, y = var_554)[name = tensor("hidden_states_69")]; tensor input_87 = mul(x = encoder_block_4_layer_1_layer_norm_weight, y = hidden_states_69)[name = tensor("input_87")]; tensor input_89 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_0_weight, x = input_87)[name = tensor("linear_32")]; tensor hidden_gelu_9_mode_0 = const()[name = tensor("hidden_gelu_9_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_9 = gelu(mode = hidden_gelu_9_mode_0, x = input_89)[name = tensor("hidden_gelu_9")]; tensor hidden_linear_9 = linear(bias = linear_4_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wi_1_weight, x = input_87)[name = tensor("linear_33")]; tensor input_91 = mul(x = hidden_gelu_9, y = hidden_linear_9)[name = tensor("input_91")]; tensor input_95 = linear(bias = linear_3_bias_0, weight = encoder_block_4_layer_1_DenseReluDense_wo_weight, x = input_91)[name = tensor("linear_34")]; tensor hidden_states_71 = add(x = hidden_states_65, y = input_95)[name = tensor("hidden_states_71")]; tensor var_17_promoted_10 = const()[name = tensor("op_17_promoted_10"), val = tensor(0x1p+1)]; tensor var_593 = pow(x = hidden_states_71, y = var_17_promoted_10)[name = tensor("op_593")]; tensor variance_21_axes_0 = const()[name = tensor("variance_21_axes_0"), val = tensor([-1])]; tensor variance_21_keep_dims_0 = const()[name = tensor("variance_21_keep_dims_0"), val = tensor(true)]; tensor variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_593)[name = tensor("variance_21")]; tensor var_596 = const()[name = tensor("op_596"), val = tensor(0x1.0c6f7ap-20)]; tensor var_597 = add(x = variance_21, y = var_596)[name = tensor("op_597")]; tensor var_598_epsilon_0 = const()[name = tensor("op_598_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_598 = rsqrt(epsilon = var_598_epsilon_0, x = var_597)[name = tensor("op_598")]; tensor hidden_states_75 = mul(x = hidden_states_71, y = var_598)[name = tensor("hidden_states_75")]; tensor hidden_states_77 = mul(x = encoder_block_5_layer_0_layer_norm_weight, y = hidden_states_75)[name = tensor("hidden_states_77")]; tensor states_41 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_q_weight, x = hidden_states_77)[name = tensor("linear_35")]; tensor var_611 = const()[name = tensor("op_611"), val = tensor([1, -1, 6, 64])]; tensor var_612 = reshape(shape = var_611, x = states_41)[name = tensor("op_612")]; tensor states_43 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_k_weight, x = hidden_states_77)[name = tensor("linear_36")]; tensor var_616 = const()[name = tensor("op_616"), val = tensor([1, -1, 6, 64])]; tensor var_617 = reshape(shape = var_616, x = states_43)[name = tensor("op_617")]; tensor states_45 = linear(bias = linear_0_bias_0, weight = encoder_block_5_layer_0_SelfAttention_v_weight, x = hidden_states_77)[name = tensor("linear_37")]; tensor var_621 = const()[name = tensor("op_621"), val = tensor([1, -1, 6, 64])]; tensor var_622 = reshape(shape = var_621, x = states_45)[name = tensor("op_622")]; tensor value_states_11_perm_0 = const()[name = tensor("value_states_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_21_transpose_x_0 = const()[name = tensor("scores_21_transpose_x_0"), val = tensor(false)]; tensor scores_21_transpose_y_0 = const()[name = tensor("scores_21_transpose_y_0"), val = tensor(false)]; tensor transpose_46_perm_0 = const()[name = tensor("transpose_46_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_47_perm_0 = const()[name = tensor("transpose_47_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_47 = transpose(perm = transpose_47_perm_0, x = var_617)[name = tensor("transpose_85")]; tensor transpose_46 = transpose(perm = transpose_46_perm_0, x = var_612)[name = tensor("transpose_86")]; tensor scores_21 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_46, y = transpose_47)[name = tensor("scores_21")]; tensor scores_23 = add(x = scores_21, y = position_bias)[name = tensor("scores_23")]; tensor var_628 = softmax(axis = var_21, x = scores_23)[name = tensor("op_628")]; tensor states_47_transpose_x_0 = const()[name = tensor("states_47_transpose_x_0"), val = tensor(false)]; tensor states_47_transpose_y_0 = const()[name = tensor("states_47_transpose_y_0"), val = tensor(false)]; tensor value_states_11 = transpose(perm = value_states_11_perm_0, x = var_622)[name = tensor("transpose_87")]; tensor states_47 = matmul(transpose_x = states_47_transpose_x_0, transpose_y = states_47_transpose_y_0, x = var_628, y = value_states_11)[name = tensor("states_47")]; tensor var_632_perm_0 = const()[name = tensor("op_632_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, -1, 384])]; tensor var_632 = transpose(perm = var_632_perm_0, x = states_47)[name = tensor("transpose_84")]; tensor input_101 = reshape(shape = var_634, x = var_632)[name = tensor("input_101")]; tensor input_103 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_0_SelfAttention_o_weight, x = input_101)[name = tensor("linear_38")]; tensor hidden_states_79 = add(x = hidden_states_71, y = input_103)[name = tensor("hidden_states_79")]; tensor var_17_promoted_11 = const()[name = tensor("op_17_promoted_11"), val = tensor(0x1p+1)]; tensor var_644 = pow(x = hidden_states_79, y = var_17_promoted_11)[name = tensor("op_644")]; tensor variance_23_axes_0 = const()[name = tensor("variance_23_axes_0"), val = tensor([-1])]; tensor variance_23_keep_dims_0 = const()[name = tensor("variance_23_keep_dims_0"), val = tensor(true)]; tensor variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_644)[name = tensor("variance_23")]; tensor var_647 = const()[name = tensor("op_647"), val = tensor(0x1.0c6f7ap-20)]; tensor var_648 = add(x = variance_23, y = var_647)[name = tensor("op_648")]; tensor var_649_epsilon_0 = const()[name = tensor("op_649_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_649 = rsqrt(epsilon = var_649_epsilon_0, x = var_648)[name = tensor("op_649")]; tensor hidden_states_83 = mul(x = hidden_states_79, y = var_649)[name = tensor("hidden_states_83")]; tensor input_105 = mul(x = encoder_block_5_layer_1_layer_norm_weight, y = hidden_states_83)[name = tensor("input_105")]; tensor input_107 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_0_weight, x = input_105)[name = tensor("linear_39")]; tensor hidden_gelu_11_mode_0 = const()[name = tensor("hidden_gelu_11_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_11 = gelu(mode = hidden_gelu_11_mode_0, x = input_107)[name = tensor("hidden_gelu_11")]; tensor hidden_linear_11 = linear(bias = linear_4_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wi_1_weight, x = input_105)[name = tensor("linear_40")]; tensor input_109 = mul(x = hidden_gelu_11, y = hidden_linear_11)[name = tensor("input_109")]; tensor input_113 = linear(bias = linear_3_bias_0, weight = encoder_block_5_layer_1_DenseReluDense_wo_weight, x = input_109)[name = tensor("linear_41")]; tensor hidden_states_85 = add(x = hidden_states_79, y = input_113)[name = tensor("hidden_states_85")]; tensor var_17_promoted_12 = const()[name = tensor("op_17_promoted_12"), val = tensor(0x1p+1)]; tensor var_688 = pow(x = hidden_states_85, y = var_17_promoted_12)[name = tensor("op_688")]; tensor variance_25_axes_0 = const()[name = tensor("variance_25_axes_0"), val = tensor([-1])]; tensor variance_25_keep_dims_0 = const()[name = tensor("variance_25_keep_dims_0"), val = tensor(true)]; tensor variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_688)[name = tensor("variance_25")]; tensor var_691 = const()[name = tensor("op_691"), val = tensor(0x1.0c6f7ap-20)]; tensor var_692 = add(x = variance_25, y = var_691)[name = tensor("op_692")]; tensor var_693_epsilon_0 = const()[name = tensor("op_693_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_693 = rsqrt(epsilon = var_693_epsilon_0, x = var_692)[name = tensor("op_693")]; tensor hidden_states_89 = mul(x = hidden_states_85, y = var_693)[name = tensor("hidden_states_89")]; tensor hidden_states_91 = mul(x = encoder_block_6_layer_0_layer_norm_weight, y = hidden_states_89)[name = tensor("hidden_states_91")]; tensor states_49 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_q_weight, x = hidden_states_91)[name = tensor("linear_42")]; tensor var_706 = const()[name = tensor("op_706"), val = tensor([1, -1, 6, 64])]; tensor var_707 = reshape(shape = var_706, x = states_49)[name = tensor("op_707")]; tensor states_51 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_k_weight, x = hidden_states_91)[name = tensor("linear_43")]; tensor var_711 = const()[name = tensor("op_711"), val = tensor([1, -1, 6, 64])]; tensor var_712 = reshape(shape = var_711, x = states_51)[name = tensor("op_712")]; tensor states_53 = linear(bias = linear_0_bias_0, weight = encoder_block_6_layer_0_SelfAttention_v_weight, x = hidden_states_91)[name = tensor("linear_44")]; tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, -1, 6, 64])]; tensor var_717 = reshape(shape = var_716, x = states_53)[name = tensor("op_717")]; tensor value_states_13_perm_0 = const()[name = tensor("value_states_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_25_transpose_x_0 = const()[name = tensor("scores_25_transpose_x_0"), val = tensor(false)]; tensor scores_25_transpose_y_0 = const()[name = tensor("scores_25_transpose_y_0"), val = tensor(false)]; tensor transpose_48_perm_0 = const()[name = tensor("transpose_48_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_49_perm_0 = const()[name = tensor("transpose_49_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = var_712)[name = tensor("transpose_81")]; tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = var_707)[name = tensor("transpose_82")]; tensor scores_25 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor("scores_25")]; tensor scores_27 = add(x = scores_25, y = position_bias)[name = tensor("scores_27")]; tensor var_723 = softmax(axis = var_21, x = scores_27)[name = tensor("op_723")]; tensor states_55_transpose_x_0 = const()[name = tensor("states_55_transpose_x_0"), val = tensor(false)]; tensor states_55_transpose_y_0 = const()[name = tensor("states_55_transpose_y_0"), val = tensor(false)]; tensor value_states_13 = transpose(perm = value_states_13_perm_0, x = var_717)[name = tensor("transpose_83")]; tensor states_55 = matmul(transpose_x = states_55_transpose_x_0, transpose_y = states_55_transpose_y_0, x = var_723, y = value_states_13)[name = tensor("states_55")]; tensor var_727_perm_0 = const()[name = tensor("op_727_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_729 = const()[name = tensor("op_729"), val = tensor([1, -1, 384])]; tensor var_727 = transpose(perm = var_727_perm_0, x = states_55)[name = tensor("transpose_80")]; tensor input_119 = reshape(shape = var_729, x = var_727)[name = tensor("input_119")]; tensor input_121 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_0_SelfAttention_o_weight, x = input_119)[name = tensor("linear_45")]; tensor hidden_states_93 = add(x = hidden_states_85, y = input_121)[name = tensor("hidden_states_93")]; tensor var_17_promoted_13 = const()[name = tensor("op_17_promoted_13"), val = tensor(0x1p+1)]; tensor var_739 = pow(x = hidden_states_93, y = var_17_promoted_13)[name = tensor("op_739")]; tensor variance_27_axes_0 = const()[name = tensor("variance_27_axes_0"), val = tensor([-1])]; tensor variance_27_keep_dims_0 = const()[name = tensor("variance_27_keep_dims_0"), val = tensor(true)]; tensor variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_739)[name = tensor("variance_27")]; tensor var_742 = const()[name = tensor("op_742"), val = tensor(0x1.0c6f7ap-20)]; tensor var_743 = add(x = variance_27, y = var_742)[name = tensor("op_743")]; tensor var_744_epsilon_0 = const()[name = tensor("op_744_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_744 = rsqrt(epsilon = var_744_epsilon_0, x = var_743)[name = tensor("op_744")]; tensor hidden_states_97 = mul(x = hidden_states_93, y = var_744)[name = tensor("hidden_states_97")]; tensor input_123 = mul(x = encoder_block_6_layer_1_layer_norm_weight, y = hidden_states_97)[name = tensor("input_123")]; tensor input_125 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_0_weight, x = input_123)[name = tensor("linear_46")]; tensor hidden_gelu_13_mode_0 = const()[name = tensor("hidden_gelu_13_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_13 = gelu(mode = hidden_gelu_13_mode_0, x = input_125)[name = tensor("hidden_gelu_13")]; tensor hidden_linear_13 = linear(bias = linear_4_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wi_1_weight, x = input_123)[name = tensor("linear_47")]; tensor input_127 = mul(x = hidden_gelu_13, y = hidden_linear_13)[name = tensor("input_127")]; tensor input_131 = linear(bias = linear_3_bias_0, weight = encoder_block_6_layer_1_DenseReluDense_wo_weight, x = input_127)[name = tensor("linear_48")]; tensor hidden_states_99 = add(x = hidden_states_93, y = input_131)[name = tensor("hidden_states_99")]; tensor var_17_promoted_14 = const()[name = tensor("op_17_promoted_14"), val = tensor(0x1p+1)]; tensor var_783 = pow(x = hidden_states_99, y = var_17_promoted_14)[name = tensor("op_783")]; tensor variance_29_axes_0 = const()[name = tensor("variance_29_axes_0"), val = tensor([-1])]; tensor variance_29_keep_dims_0 = const()[name = tensor("variance_29_keep_dims_0"), val = tensor(true)]; tensor variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_783)[name = tensor("variance_29")]; tensor var_786 = const()[name = tensor("op_786"), val = tensor(0x1.0c6f7ap-20)]; tensor var_787 = add(x = variance_29, y = var_786)[name = tensor("op_787")]; tensor var_788_epsilon_0 = const()[name = tensor("op_788_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_788 = rsqrt(epsilon = var_788_epsilon_0, x = var_787)[name = tensor("op_788")]; tensor hidden_states_103 = mul(x = hidden_states_99, y = var_788)[name = tensor("hidden_states_103")]; tensor hidden_states_105 = mul(x = encoder_block_7_layer_0_layer_norm_weight, y = hidden_states_103)[name = tensor("hidden_states_105")]; tensor states_57 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_q_weight, x = hidden_states_105)[name = tensor("linear_49")]; tensor var_801 = const()[name = tensor("op_801"), val = tensor([1, -1, 6, 64])]; tensor var_802 = reshape(shape = var_801, x = states_57)[name = tensor("op_802")]; tensor states_59 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_k_weight, x = hidden_states_105)[name = tensor("linear_50")]; tensor var_806 = const()[name = tensor("op_806"), val = tensor([1, -1, 6, 64])]; tensor var_807 = reshape(shape = var_806, x = states_59)[name = tensor("op_807")]; tensor states_61 = linear(bias = linear_0_bias_0, weight = encoder_block_7_layer_0_SelfAttention_v_weight, x = hidden_states_105)[name = tensor("linear_51")]; tensor var_811 = const()[name = tensor("op_811"), val = tensor([1, -1, 6, 64])]; tensor var_812 = reshape(shape = var_811, x = states_61)[name = tensor("op_812")]; tensor value_states_15_perm_0 = const()[name = tensor("value_states_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_29_transpose_x_0 = const()[name = tensor("scores_29_transpose_x_0"), val = tensor(false)]; tensor scores_29_transpose_y_0 = const()[name = tensor("scores_29_transpose_y_0"), val = tensor(false)]; tensor transpose_50_perm_0 = const()[name = tensor("transpose_50_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_51_perm_0 = const()[name = tensor("transpose_51_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = var_807)[name = tensor("transpose_77")]; tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = var_802)[name = tensor("transpose_78")]; tensor scores_29 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor("scores_29")]; tensor scores_31 = add(x = scores_29, y = position_bias)[name = tensor("scores_31")]; tensor var_818 = softmax(axis = var_21, x = scores_31)[name = tensor("op_818")]; tensor states_63_transpose_x_0 = const()[name = tensor("states_63_transpose_x_0"), val = tensor(false)]; tensor states_63_transpose_y_0 = const()[name = tensor("states_63_transpose_y_0"), val = tensor(false)]; tensor value_states_15 = transpose(perm = value_states_15_perm_0, x = var_812)[name = tensor("transpose_79")]; tensor states_63 = matmul(transpose_x = states_63_transpose_x_0, transpose_y = states_63_transpose_y_0, x = var_818, y = value_states_15)[name = tensor("states_63")]; tensor var_822_perm_0 = const()[name = tensor("op_822_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_824 = const()[name = tensor("op_824"), val = tensor([1, -1, 384])]; tensor var_822 = transpose(perm = var_822_perm_0, x = states_63)[name = tensor("transpose_76")]; tensor input_137 = reshape(shape = var_824, x = var_822)[name = tensor("input_137")]; tensor input_139 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_0_SelfAttention_o_weight, x = input_137)[name = tensor("linear_52")]; tensor hidden_states_107 = add(x = hidden_states_99, y = input_139)[name = tensor("hidden_states_107")]; tensor var_17_promoted_15 = const()[name = tensor("op_17_promoted_15"), val = tensor(0x1p+1)]; tensor var_834 = pow(x = hidden_states_107, y = var_17_promoted_15)[name = tensor("op_834")]; tensor variance_31_axes_0 = const()[name = tensor("variance_31_axes_0"), val = tensor([-1])]; tensor variance_31_keep_dims_0 = const()[name = tensor("variance_31_keep_dims_0"), val = tensor(true)]; tensor variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_834)[name = tensor("variance_31")]; tensor var_837 = const()[name = tensor("op_837"), val = tensor(0x1.0c6f7ap-20)]; tensor var_838 = add(x = variance_31, y = var_837)[name = tensor("op_838")]; tensor var_839_epsilon_0 = const()[name = tensor("op_839_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_839 = rsqrt(epsilon = var_839_epsilon_0, x = var_838)[name = tensor("op_839")]; tensor hidden_states_111 = mul(x = hidden_states_107, y = var_839)[name = tensor("hidden_states_111")]; tensor input_141 = mul(x = encoder_block_7_layer_1_layer_norm_weight, y = hidden_states_111)[name = tensor("input_141")]; tensor input_143 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_0_weight, x = input_141)[name = tensor("linear_53")]; tensor hidden_gelu_15_mode_0 = const()[name = tensor("hidden_gelu_15_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_15 = gelu(mode = hidden_gelu_15_mode_0, x = input_143)[name = tensor("hidden_gelu_15")]; tensor hidden_linear_15 = linear(bias = linear_4_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wi_1_weight, x = input_141)[name = tensor("linear_54")]; tensor input_145 = mul(x = hidden_gelu_15, y = hidden_linear_15)[name = tensor("input_145")]; tensor input_149 = linear(bias = linear_3_bias_0, weight = encoder_block_7_layer_1_DenseReluDense_wo_weight, x = input_145)[name = tensor("linear_55")]; tensor hidden_states_113 = add(x = hidden_states_107, y = input_149)[name = tensor("hidden_states_113")]; tensor var_17_promoted_16 = const()[name = tensor("op_17_promoted_16"), val = tensor(0x1p+1)]; tensor var_878 = pow(x = hidden_states_113, y = var_17_promoted_16)[name = tensor("op_878")]; tensor variance_33_axes_0 = const()[name = tensor("variance_33_axes_0"), val = tensor([-1])]; tensor variance_33_keep_dims_0 = const()[name = tensor("variance_33_keep_dims_0"), val = tensor(true)]; tensor variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_878)[name = tensor("variance_33")]; tensor var_881 = const()[name = tensor("op_881"), val = tensor(0x1.0c6f7ap-20)]; tensor var_882 = add(x = variance_33, y = var_881)[name = tensor("op_882")]; tensor var_883_epsilon_0 = const()[name = tensor("op_883_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_883 = rsqrt(epsilon = var_883_epsilon_0, x = var_882)[name = tensor("op_883")]; tensor hidden_states_117 = mul(x = hidden_states_113, y = var_883)[name = tensor("hidden_states_117")]; tensor hidden_states_119 = mul(x = encoder_block_8_layer_0_layer_norm_weight, y = hidden_states_117)[name = tensor("hidden_states_119")]; tensor states_65 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_q_weight, x = hidden_states_119)[name = tensor("linear_56")]; tensor var_896 = const()[name = tensor("op_896"), val = tensor([1, -1, 6, 64])]; tensor var_897 = reshape(shape = var_896, x = states_65)[name = tensor("op_897")]; tensor states_67 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_k_weight, x = hidden_states_119)[name = tensor("linear_57")]; tensor var_901 = const()[name = tensor("op_901"), val = tensor([1, -1, 6, 64])]; tensor var_902 = reshape(shape = var_901, x = states_67)[name = tensor("op_902")]; tensor states_69 = linear(bias = linear_0_bias_0, weight = encoder_block_8_layer_0_SelfAttention_v_weight, x = hidden_states_119)[name = tensor("linear_58")]; tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, -1, 6, 64])]; tensor var_907 = reshape(shape = var_906, x = states_69)[name = tensor("op_907")]; tensor value_states_17_perm_0 = const()[name = tensor("value_states_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_33_transpose_x_0 = const()[name = tensor("scores_33_transpose_x_0"), val = tensor(false)]; tensor scores_33_transpose_y_0 = const()[name = tensor("scores_33_transpose_y_0"), val = tensor(false)]; tensor transpose_52_perm_0 = const()[name = tensor("transpose_52_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_53_perm_0 = const()[name = tensor("transpose_53_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_53 = transpose(perm = transpose_53_perm_0, x = var_902)[name = tensor("transpose_73")]; tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = var_897)[name = tensor("transpose_74")]; tensor scores_33 = matmul(transpose_x = scores_33_transpose_x_0, transpose_y = scores_33_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor("scores_33")]; tensor scores_35 = add(x = scores_33, y = position_bias)[name = tensor("scores_35")]; tensor var_913 = softmax(axis = var_21, x = scores_35)[name = tensor("op_913")]; tensor states_71_transpose_x_0 = const()[name = tensor("states_71_transpose_x_0"), val = tensor(false)]; tensor states_71_transpose_y_0 = const()[name = tensor("states_71_transpose_y_0"), val = tensor(false)]; tensor value_states_17 = transpose(perm = value_states_17_perm_0, x = var_907)[name = tensor("transpose_75")]; tensor states_71 = matmul(transpose_x = states_71_transpose_x_0, transpose_y = states_71_transpose_y_0, x = var_913, y = value_states_17)[name = tensor("states_71")]; tensor var_917_perm_0 = const()[name = tensor("op_917_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_919 = const()[name = tensor("op_919"), val = tensor([1, -1, 384])]; tensor var_917 = transpose(perm = var_917_perm_0, x = states_71)[name = tensor("transpose_72")]; tensor input_155 = reshape(shape = var_919, x = var_917)[name = tensor("input_155")]; tensor input_157 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_0_SelfAttention_o_weight, x = input_155)[name = tensor("linear_59")]; tensor hidden_states_121 = add(x = hidden_states_113, y = input_157)[name = tensor("hidden_states_121")]; tensor var_17_promoted_17 = const()[name = tensor("op_17_promoted_17"), val = tensor(0x1p+1)]; tensor var_929 = pow(x = hidden_states_121, y = var_17_promoted_17)[name = tensor("op_929")]; tensor variance_35_axes_0 = const()[name = tensor("variance_35_axes_0"), val = tensor([-1])]; tensor variance_35_keep_dims_0 = const()[name = tensor("variance_35_keep_dims_0"), val = tensor(true)]; tensor variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_929)[name = tensor("variance_35")]; tensor var_932 = const()[name = tensor("op_932"), val = tensor(0x1.0c6f7ap-20)]; tensor var_933 = add(x = variance_35, y = var_932)[name = tensor("op_933")]; tensor var_934_epsilon_0 = const()[name = tensor("op_934_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_934 = rsqrt(epsilon = var_934_epsilon_0, x = var_933)[name = tensor("op_934")]; tensor hidden_states_125 = mul(x = hidden_states_121, y = var_934)[name = tensor("hidden_states_125")]; tensor input_159 = mul(x = encoder_block_8_layer_1_layer_norm_weight, y = hidden_states_125)[name = tensor("input_159")]; tensor input_161 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_0_weight, x = input_159)[name = tensor("linear_60")]; tensor hidden_gelu_17_mode_0 = const()[name = tensor("hidden_gelu_17_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_17 = gelu(mode = hidden_gelu_17_mode_0, x = input_161)[name = tensor("hidden_gelu_17")]; tensor hidden_linear_17 = linear(bias = linear_4_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wi_1_weight, x = input_159)[name = tensor("linear_61")]; tensor input_163 = mul(x = hidden_gelu_17, y = hidden_linear_17)[name = tensor("input_163")]; tensor input_167 = linear(bias = linear_3_bias_0, weight = encoder_block_8_layer_1_DenseReluDense_wo_weight, x = input_163)[name = tensor("linear_62")]; tensor hidden_states_127 = add(x = hidden_states_121, y = input_167)[name = tensor("hidden_states_127")]; tensor var_17_promoted_18 = const()[name = tensor("op_17_promoted_18"), val = tensor(0x1p+1)]; tensor var_973 = pow(x = hidden_states_127, y = var_17_promoted_18)[name = tensor("op_973")]; tensor variance_37_axes_0 = const()[name = tensor("variance_37_axes_0"), val = tensor([-1])]; tensor variance_37_keep_dims_0 = const()[name = tensor("variance_37_keep_dims_0"), val = tensor(true)]; tensor variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_973)[name = tensor("variance_37")]; tensor var_976 = const()[name = tensor("op_976"), val = tensor(0x1.0c6f7ap-20)]; tensor var_977 = add(x = variance_37, y = var_976)[name = tensor("op_977")]; tensor var_978_epsilon_0 = const()[name = tensor("op_978_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_978 = rsqrt(epsilon = var_978_epsilon_0, x = var_977)[name = tensor("op_978")]; tensor hidden_states_131 = mul(x = hidden_states_127, y = var_978)[name = tensor("hidden_states_131")]; tensor hidden_states_133 = mul(x = encoder_block_9_layer_0_layer_norm_weight, y = hidden_states_131)[name = tensor("hidden_states_133")]; tensor states_73 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_q_weight, x = hidden_states_133)[name = tensor("linear_63")]; tensor var_991 = const()[name = tensor("op_991"), val = tensor([1, -1, 6, 64])]; tensor var_992 = reshape(shape = var_991, x = states_73)[name = tensor("op_992")]; tensor states_75 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_k_weight, x = hidden_states_133)[name = tensor("linear_64")]; tensor var_996 = const()[name = tensor("op_996"), val = tensor([1, -1, 6, 64])]; tensor var_997 = reshape(shape = var_996, x = states_75)[name = tensor("op_997")]; tensor states_77 = linear(bias = linear_0_bias_0, weight = encoder_block_9_layer_0_SelfAttention_v_weight, x = hidden_states_133)[name = tensor("linear_65")]; tensor var_1001 = const()[name = tensor("op_1001"), val = tensor([1, -1, 6, 64])]; tensor var_1002 = reshape(shape = var_1001, x = states_77)[name = tensor("op_1002")]; tensor value_states_19_perm_0 = const()[name = tensor("value_states_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_37_transpose_x_0 = const()[name = tensor("scores_37_transpose_x_0"), val = tensor(false)]; tensor scores_37_transpose_y_0 = const()[name = tensor("scores_37_transpose_y_0"), val = tensor(false)]; tensor transpose_54_perm_0 = const()[name = tensor("transpose_54_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_55_perm_0 = const()[name = tensor("transpose_55_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = var_997)[name = tensor("transpose_69")]; tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = var_992)[name = tensor("transpose_70")]; tensor scores_37 = matmul(transpose_x = scores_37_transpose_x_0, transpose_y = scores_37_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor("scores_37")]; tensor scores_39 = add(x = scores_37, y = position_bias)[name = tensor("scores_39")]; tensor var_1008 = softmax(axis = var_21, x = scores_39)[name = tensor("op_1008")]; tensor states_79_transpose_x_0 = const()[name = tensor("states_79_transpose_x_0"), val = tensor(false)]; tensor states_79_transpose_y_0 = const()[name = tensor("states_79_transpose_y_0"), val = tensor(false)]; tensor value_states_19 = transpose(perm = value_states_19_perm_0, x = var_1002)[name = tensor("transpose_71")]; tensor states_79 = matmul(transpose_x = states_79_transpose_x_0, transpose_y = states_79_transpose_y_0, x = var_1008, y = value_states_19)[name = tensor("states_79")]; tensor var_1012_perm_0 = const()[name = tensor("op_1012_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1014 = const()[name = tensor("op_1014"), val = tensor([1, -1, 384])]; tensor var_1012 = transpose(perm = var_1012_perm_0, x = states_79)[name = tensor("transpose_68")]; tensor input_173 = reshape(shape = var_1014, x = var_1012)[name = tensor("input_173")]; tensor input_175 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_0_SelfAttention_o_weight, x = input_173)[name = tensor("linear_66")]; tensor hidden_states_135 = add(x = hidden_states_127, y = input_175)[name = tensor("hidden_states_135")]; tensor var_17_promoted_19 = const()[name = tensor("op_17_promoted_19"), val = tensor(0x1p+1)]; tensor var_1024 = pow(x = hidden_states_135, y = var_17_promoted_19)[name = tensor("op_1024")]; tensor variance_39_axes_0 = const()[name = tensor("variance_39_axes_0"), val = tensor([-1])]; tensor variance_39_keep_dims_0 = const()[name = tensor("variance_39_keep_dims_0"), val = tensor(true)]; tensor variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1024)[name = tensor("variance_39")]; tensor var_1027 = const()[name = tensor("op_1027"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1028 = add(x = variance_39, y = var_1027)[name = tensor("op_1028")]; tensor var_1029_epsilon_0 = const()[name = tensor("op_1029_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1029 = rsqrt(epsilon = var_1029_epsilon_0, x = var_1028)[name = tensor("op_1029")]; tensor hidden_states_139 = mul(x = hidden_states_135, y = var_1029)[name = tensor("hidden_states_139")]; tensor input_177 = mul(x = encoder_block_9_layer_1_layer_norm_weight, y = hidden_states_139)[name = tensor("input_177")]; tensor input_179 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_0_weight, x = input_177)[name = tensor("linear_67")]; tensor hidden_gelu_19_mode_0 = const()[name = tensor("hidden_gelu_19_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_19 = gelu(mode = hidden_gelu_19_mode_0, x = input_179)[name = tensor("hidden_gelu_19")]; tensor hidden_linear_19 = linear(bias = linear_4_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wi_1_weight, x = input_177)[name = tensor("linear_68")]; tensor input_181 = mul(x = hidden_gelu_19, y = hidden_linear_19)[name = tensor("input_181")]; tensor input_185 = linear(bias = linear_3_bias_0, weight = encoder_block_9_layer_1_DenseReluDense_wo_weight, x = input_181)[name = tensor("linear_69")]; tensor hidden_states_141 = add(x = hidden_states_135, y = input_185)[name = tensor("hidden_states_141")]; tensor var_17_promoted_20 = const()[name = tensor("op_17_promoted_20"), val = tensor(0x1p+1)]; tensor var_1068 = pow(x = hidden_states_141, y = var_17_promoted_20)[name = tensor("op_1068")]; tensor variance_41_axes_0 = const()[name = tensor("variance_41_axes_0"), val = tensor([-1])]; tensor variance_41_keep_dims_0 = const()[name = tensor("variance_41_keep_dims_0"), val = tensor(true)]; tensor variance_41 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1068)[name = tensor("variance_41")]; tensor var_1071 = const()[name = tensor("op_1071"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1072 = add(x = variance_41, y = var_1071)[name = tensor("op_1072")]; tensor var_1073_epsilon_0 = const()[name = tensor("op_1073_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1073 = rsqrt(epsilon = var_1073_epsilon_0, x = var_1072)[name = tensor("op_1073")]; tensor hidden_states_145 = mul(x = hidden_states_141, y = var_1073)[name = tensor("hidden_states_145")]; tensor hidden_states_147 = mul(x = encoder_block_10_layer_0_layer_norm_weight, y = hidden_states_145)[name = tensor("hidden_states_147")]; tensor states_81 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_q_weight, x = hidden_states_147)[name = tensor("linear_70")]; tensor var_1086 = const()[name = tensor("op_1086"), val = tensor([1, -1, 6, 64])]; tensor var_1087 = reshape(shape = var_1086, x = states_81)[name = tensor("op_1087")]; tensor states_83 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_k_weight, x = hidden_states_147)[name = tensor("linear_71")]; tensor var_1091 = const()[name = tensor("op_1091"), val = tensor([1, -1, 6, 64])]; tensor var_1092 = reshape(shape = var_1091, x = states_83)[name = tensor("op_1092")]; tensor states_85 = linear(bias = linear_0_bias_0, weight = encoder_block_10_layer_0_SelfAttention_v_weight, x = hidden_states_147)[name = tensor("linear_72")]; tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([1, -1, 6, 64])]; tensor var_1097 = reshape(shape = var_1096, x = states_85)[name = tensor("op_1097")]; tensor value_states_21_perm_0 = const()[name = tensor("value_states_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_41_transpose_x_0 = const()[name = tensor("scores_41_transpose_x_0"), val = tensor(false)]; tensor scores_41_transpose_y_0 = const()[name = tensor("scores_41_transpose_y_0"), val = tensor(false)]; tensor transpose_56_perm_0 = const()[name = tensor("transpose_56_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_57_perm_0 = const()[name = tensor("transpose_57_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = var_1092)[name = tensor("transpose_65")]; tensor transpose_56 = transpose(perm = transpose_56_perm_0, x = var_1087)[name = tensor("transpose_66")]; tensor scores_41 = matmul(transpose_x = scores_41_transpose_x_0, transpose_y = scores_41_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor("scores_41")]; tensor scores_43 = add(x = scores_41, y = position_bias)[name = tensor("scores_43")]; tensor var_1103 = softmax(axis = var_21, x = scores_43)[name = tensor("op_1103")]; tensor states_87_transpose_x_0 = const()[name = tensor("states_87_transpose_x_0"), val = tensor(false)]; tensor states_87_transpose_y_0 = const()[name = tensor("states_87_transpose_y_0"), val = tensor(false)]; tensor value_states_21 = transpose(perm = value_states_21_perm_0, x = var_1097)[name = tensor("transpose_67")]; tensor states_87 = matmul(transpose_x = states_87_transpose_x_0, transpose_y = states_87_transpose_y_0, x = var_1103, y = value_states_21)[name = tensor("states_87")]; tensor var_1107_perm_0 = const()[name = tensor("op_1107_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1109 = const()[name = tensor("op_1109"), val = tensor([1, -1, 384])]; tensor var_1107 = transpose(perm = var_1107_perm_0, x = states_87)[name = tensor("transpose_64")]; tensor input_191 = reshape(shape = var_1109, x = var_1107)[name = tensor("input_191")]; tensor input_193 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_0_SelfAttention_o_weight, x = input_191)[name = tensor("linear_73")]; tensor hidden_states_149 = add(x = hidden_states_141, y = input_193)[name = tensor("hidden_states_149")]; tensor var_17_promoted_21 = const()[name = tensor("op_17_promoted_21"), val = tensor(0x1p+1)]; tensor var_1119 = pow(x = hidden_states_149, y = var_17_promoted_21)[name = tensor("op_1119")]; tensor variance_43_axes_0 = const()[name = tensor("variance_43_axes_0"), val = tensor([-1])]; tensor variance_43_keep_dims_0 = const()[name = tensor("variance_43_keep_dims_0"), val = tensor(true)]; tensor variance_43 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1119)[name = tensor("variance_43")]; tensor var_1122 = const()[name = tensor("op_1122"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1123 = add(x = variance_43, y = var_1122)[name = tensor("op_1123")]; tensor var_1124_epsilon_0 = const()[name = tensor("op_1124_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1124 = rsqrt(epsilon = var_1124_epsilon_0, x = var_1123)[name = tensor("op_1124")]; tensor hidden_states_153 = mul(x = hidden_states_149, y = var_1124)[name = tensor("hidden_states_153")]; tensor input_195 = mul(x = encoder_block_10_layer_1_layer_norm_weight, y = hidden_states_153)[name = tensor("input_195")]; tensor input_197 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_0_weight, x = input_195)[name = tensor("linear_74")]; tensor hidden_gelu_21_mode_0 = const()[name = tensor("hidden_gelu_21_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_21 = gelu(mode = hidden_gelu_21_mode_0, x = input_197)[name = tensor("hidden_gelu_21")]; tensor hidden_linear_21 = linear(bias = linear_4_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wi_1_weight, x = input_195)[name = tensor("linear_75")]; tensor input_199 = mul(x = hidden_gelu_21, y = hidden_linear_21)[name = tensor("input_199")]; tensor input_203 = linear(bias = linear_3_bias_0, weight = encoder_block_10_layer_1_DenseReluDense_wo_weight, x = input_199)[name = tensor("linear_76")]; tensor hidden_states_155 = add(x = hidden_states_149, y = input_203)[name = tensor("hidden_states_155")]; tensor var_17_promoted_22 = const()[name = tensor("op_17_promoted_22"), val = tensor(0x1p+1)]; tensor var_1163 = pow(x = hidden_states_155, y = var_17_promoted_22)[name = tensor("op_1163")]; tensor variance_45_axes_0 = const()[name = tensor("variance_45_axes_0"), val = tensor([-1])]; tensor variance_45_keep_dims_0 = const()[name = tensor("variance_45_keep_dims_0"), val = tensor(true)]; tensor variance_45 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1163)[name = tensor("variance_45")]; tensor var_1166 = const()[name = tensor("op_1166"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1167 = add(x = variance_45, y = var_1166)[name = tensor("op_1167")]; tensor var_1168_epsilon_0 = const()[name = tensor("op_1168_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1168 = rsqrt(epsilon = var_1168_epsilon_0, x = var_1167)[name = tensor("op_1168")]; tensor hidden_states_159 = mul(x = hidden_states_155, y = var_1168)[name = tensor("hidden_states_159")]; tensor hidden_states_161 = mul(x = encoder_block_11_layer_0_layer_norm_weight, y = hidden_states_159)[name = tensor("hidden_states_161")]; tensor states_89 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_q_weight, x = hidden_states_161)[name = tensor("linear_77")]; tensor var_1181 = const()[name = tensor("op_1181"), val = tensor([1, -1, 6, 64])]; tensor var_1182 = reshape(shape = var_1181, x = states_89)[name = tensor("op_1182")]; tensor states_91 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_k_weight, x = hidden_states_161)[name = tensor("linear_78")]; tensor var_1186 = const()[name = tensor("op_1186"), val = tensor([1, -1, 6, 64])]; tensor var_1187 = reshape(shape = var_1186, x = states_91)[name = tensor("op_1187")]; tensor states_93 = linear(bias = linear_0_bias_0, weight = encoder_block_11_layer_0_SelfAttention_v_weight, x = hidden_states_161)[name = tensor("linear_79")]; tensor var_1191 = const()[name = tensor("op_1191"), val = tensor([1, -1, 6, 64])]; tensor var_1192 = reshape(shape = var_1191, x = states_93)[name = tensor("op_1192")]; tensor value_states_perm_0 = const()[name = tensor("value_states_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_45_transpose_x_0 = const()[name = tensor("scores_45_transpose_x_0"), val = tensor(false)]; tensor scores_45_transpose_y_0 = const()[name = tensor("scores_45_transpose_y_0"), val = tensor(false)]; tensor transpose_58_perm_0 = const()[name = tensor("transpose_58_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_59_perm_0 = const()[name = tensor("transpose_59_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_59 = transpose(perm = transpose_59_perm_0, x = var_1187)[name = tensor("transpose_61")]; tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = var_1182)[name = tensor("transpose_62")]; tensor scores_45 = matmul(transpose_x = scores_45_transpose_x_0, transpose_y = scores_45_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor("scores_45")]; tensor scores = add(x = scores_45, y = position_bias)[name = tensor("scores")]; tensor var_1198 = softmax(axis = var_21, x = scores)[name = tensor("op_1198")]; tensor states_transpose_x_0 = const()[name = tensor("states_transpose_x_0"), val = tensor(false)]; tensor states_transpose_y_0 = const()[name = tensor("states_transpose_y_0"), val = tensor(false)]; tensor value_states = transpose(perm = value_states_perm_0, x = var_1192)[name = tensor("transpose_63")]; tensor states = matmul(transpose_x = states_transpose_x_0, transpose_y = states_transpose_y_0, x = var_1198, y = value_states)[name = tensor("states")]; tensor var_1202_perm_0 = const()[name = tensor("op_1202_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1204 = const()[name = tensor("op_1204"), val = tensor([1, -1, 384])]; tensor var_1202 = transpose(perm = var_1202_perm_0, x = states)[name = tensor("transpose_60")]; tensor input_209 = reshape(shape = var_1204, x = var_1202)[name = tensor("input_209")]; tensor input_211 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_0_SelfAttention_o_weight, x = input_209)[name = tensor("linear_80")]; tensor hidden_states_163 = add(x = hidden_states_155, y = input_211)[name = tensor("hidden_states_163")]; tensor var_17_promoted_23 = const()[name = tensor("op_17_promoted_23"), val = tensor(0x1p+1)]; tensor var_1214 = pow(x = hidden_states_163, y = var_17_promoted_23)[name = tensor("op_1214")]; tensor variance_47_axes_0 = const()[name = tensor("variance_47_axes_0"), val = tensor([-1])]; tensor variance_47_keep_dims_0 = const()[name = tensor("variance_47_keep_dims_0"), val = tensor(true)]; tensor variance_47 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1214)[name = tensor("variance_47")]; tensor var_1217 = const()[name = tensor("op_1217"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1218 = add(x = variance_47, y = var_1217)[name = tensor("op_1218")]; tensor var_1219_epsilon_0 = const()[name = tensor("op_1219_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1219 = rsqrt(epsilon = var_1219_epsilon_0, x = var_1218)[name = tensor("op_1219")]; tensor hidden_states_167 = mul(x = hidden_states_163, y = var_1219)[name = tensor("hidden_states_167")]; tensor input_213 = mul(x = encoder_block_11_layer_1_layer_norm_weight, y = hidden_states_167)[name = tensor("input_213")]; tensor input_215 = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_0_weight, x = input_213)[name = tensor("linear_81")]; tensor hidden_gelu_mode_0 = const()[name = tensor("hidden_gelu_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu = gelu(mode = hidden_gelu_mode_0, x = input_215)[name = tensor("hidden_gelu")]; tensor hidden_linear = linear(bias = linear_4_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wi_1_weight, x = input_213)[name = tensor("linear_82")]; tensor input_217 = mul(x = hidden_gelu, y = hidden_linear)[name = tensor("input_217")]; tensor input_221 = linear(bias = linear_3_bias_0, weight = encoder_block_11_layer_1_DenseReluDense_wo_weight, x = input_217)[name = tensor("linear_83")]; tensor hidden_states_169 = add(x = hidden_states_163, y = input_221)[name = tensor("hidden_states_169")]; tensor var_17_promoted_24 = const()[name = tensor("op_17_promoted_24"), val = tensor(0x1p+1)]; tensor var_1252 = pow(x = hidden_states_169, y = var_17_promoted_24)[name = tensor("op_1252")]; tensor variance_axes_0 = const()[name = tensor("variance_axes_0"), val = tensor([-1])]; tensor variance_keep_dims_0 = const()[name = tensor("variance_keep_dims_0"), val = tensor(true)]; tensor variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_1252)[name = tensor("variance")]; tensor var_1255 = const()[name = tensor("op_1255"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1256 = add(x = variance, y = var_1255)[name = tensor("op_1256")]; tensor var_1257_epsilon_0 = const()[name = tensor("op_1257_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1257 = rsqrt(epsilon = var_1257_epsilon_0, x = var_1256)[name = tensor("op_1257")]; tensor hidden_states = mul(x = hidden_states_169, y = var_1257)[name = tensor("hidden_states")]; tensor last_hidden_state = mul(x = encoder_final_layer_norm_weight, y = hidden_states)[name = tensor("input")]; } -> (last_hidden_state); }