program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { tensor cast_0_dtype_0 = const()[name = tensor("cast_0_dtype_0"), val = tensor("fp32")]; tensor cast_1_dtype_0 = const()[name = tensor("cast_1_dtype_0"), val = tensor("fp32")]; tensor cast_2_dtype_0 = const()[name = tensor("cast_2_dtype_0"), val = tensor("fp32")]; tensor cast_3_dtype_0 = const()[name = tensor("cast_3_dtype_0"), val = tensor("fp32")]; tensor cast_4_dtype_0 = const()[name = tensor("cast_4_dtype_0"), val = tensor("fp32")]; tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2097280))), name = tensor("layers_0_self_attn_q_proj_weight_palettized"), shape = tensor([2048, 1024])]; tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2098368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3147008))), name = tensor("layers_0_self_attn_k_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3148096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4196736))), name = tensor("layers_0_self_attn_v_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_0_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4197824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6295040))), name = tensor("layers_0_self_attn_o_proj_weight_palettized"), shape = tensor([1024, 2048])]; tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6296128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9441920))), name = tensor("layers_0_mlp_gate_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9443008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12588800))), name = tensor("layers_0_mlp_up_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12589888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15735680))), name = tensor("layers_0_mlp_down_proj_weight_palettized"), shape = tensor([1024, 3072])]; tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15736768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17833984))), name = tensor("layers_1_self_attn_q_proj_weight_palettized"), shape = tensor([2048, 1024])]; tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17835072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18883712))), name = tensor("layers_1_self_attn_k_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18884800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19933440))), name = tensor("layers_1_self_attn_v_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_1_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19934528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22031744))), name = tensor("layers_1_self_attn_o_proj_weight_palettized"), shape = tensor([1024, 2048])]; tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22032832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25178624))), name = tensor("layers_1_mlp_gate_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25179712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28325504))), name = tensor("layers_1_mlp_up_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28326592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31472384))), name = tensor("layers_1_mlp_down_proj_weight_palettized"), shape = tensor([1024, 3072])]; tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31473472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33570688))), name = tensor("layers_2_self_attn_q_proj_weight_palettized"), shape = tensor([2048, 1024])]; tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33571776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34620416))), name = tensor("layers_2_self_attn_k_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34621504))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35670144))), name = tensor("layers_2_self_attn_v_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_2_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35671232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37768448))), name = tensor("layers_2_self_attn_o_proj_weight_palettized"), shape = tensor([1024, 2048])]; tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37769536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40915328))), name = tensor("layers_2_mlp_gate_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40916416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44062208))), name = tensor("layers_2_mlp_up_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44063296))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47209088))), name = tensor("layers_2_mlp_down_proj_weight_palettized"), shape = tensor([1024, 3072])]; tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47210176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49307392))), name = tensor("layers_3_self_attn_q_proj_weight_palettized"), shape = tensor([2048, 1024])]; tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49308480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50357120))), name = tensor("layers_3_self_attn_k_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50358208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51406848))), name = tensor("layers_3_self_attn_v_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_3_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51407936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53505152))), name = tensor("layers_3_self_attn_o_proj_weight_palettized"), shape = tensor([1024, 2048])]; tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53506240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56652032))), name = tensor("layers_3_mlp_gate_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56653120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59798912))), name = tensor("layers_3_mlp_up_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59800000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62945792))), name = tensor("layers_3_mlp_down_proj_weight_palettized"), shape = tensor([1024, 3072])]; tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62946880))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65044096))), name = tensor("layers_4_self_attn_q_proj_weight_palettized"), shape = tensor([2048, 1024])]; tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65045184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66093824))), name = tensor("layers_4_self_attn_k_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66094912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67143552))), name = tensor("layers_4_self_attn_v_proj_weight_palettized"), shape = tensor([1024, 1024])]; tensor layers_4_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67144640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69241856))), name = tensor("layers_4_self_attn_o_proj_weight_palettized"), shape = tensor([1024, 2048])]; tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69242944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72388736))), name = tensor("layers_4_mlp_gate_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72389824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75535616))), name = tensor("layers_4_mlp_up_proj_weight_palettized"), shape = tensor([3072, 1024])]; tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75536704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78682496))), name = tensor("layers_4_mlp_down_proj_weight_palettized"), shape = tensor([1024, 3072])]; tensor lm_heads_0_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78683584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80780800))), name = tensor("lm_heads_0_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_1_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80781888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82879104))), name = tensor("lm_heads_1_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_2_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82880192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84977408))), name = tensor("lm_heads_2_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_3_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84978496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87075712))), name = tensor("lm_heads_3_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_4_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87076800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89174016))), name = tensor("lm_heads_4_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_5_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89175104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91272320))), name = tensor("lm_heads_5_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_6_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91273408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93370624))), name = tensor("lm_heads_6_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_7_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93371712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95468928))), name = tensor("lm_heads_7_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_8_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95470016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97567232))), name = tensor("lm_heads_8_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_9_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97568320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99665536))), name = tensor("lm_heads_9_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_10_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99666624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101763840))), name = tensor("lm_heads_10_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_11_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101764928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103862144))), name = tensor("lm_heads_11_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_12_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103863232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105960448))), name = tensor("lm_heads_12_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_13_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105961536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108058752))), name = tensor("lm_heads_13_weight_palettized"), shape = tensor([2048, 1024])]; tensor lm_heads_14_weight_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108059840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110157056))), name = tensor("lm_heads_14_weight_palettized"), shape = tensor([2048, 1024])]; tensor var_205_axes_0 = const()[name = tensor("op_205_axes_0"), val = tensor([0])]; tensor var_205 = expand_dims(axes = var_205_axes_0, x = cache_length)[name = tensor("op_205")]; tensor pos_dtype_0 = const()[name = tensor("pos_dtype_0"), val = tensor("fp32")]; tensor const_0 = const()[name = tensor("const_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110158144)))]; tensor var_226_axes_0 = const()[name = tensor("op_226_axes_0"), val = tensor([-1])]; tensor pos = cast(dtype = pos_dtype_0, x = var_205)[name = tensor("cast_9")]; tensor var_226 = expand_dims(axes = var_226_axes_0, x = pos)[name = tensor("op_226")]; tensor var_227_transpose_x_0 = const()[name = tensor("op_227_transpose_x_0"), val = tensor(false)]; tensor var_227_transpose_y_0 = const()[name = tensor("op_227_transpose_y_0"), val = tensor(false)]; tensor var_227 = matmul(transpose_x = var_227_transpose_x_0, transpose_y = var_227_transpose_y_0, x = const_0, y = var_226)[name = tensor("op_227")]; tensor freqs_perm_0 = const()[name = tensor("freqs_perm_0"), val = tensor([0, 2, 1])]; tensor var_232 = const()[name = tensor("op_232"), val = tensor(-1)]; tensor emb_interleave_0 = const()[name = tensor("emb_interleave_0"), val = tensor(false)]; tensor freqs = transpose(perm = freqs_perm_0, x = var_227)[name = tensor("transpose_10")]; tensor emb = concat(axis = var_232, interleave = emb_interleave_0, values = (freqs, freqs))[name = tensor("emb")]; tensor var_234 = cos(x = emb)[name = tensor("op_234")]; tensor var_242 = sin(x = emb)[name = tensor("op_242")]; tensor var_251_axes_0 = const()[name = tensor("op_251_axes_0"), val = tensor([1])]; tensor cast_3 = cast(dtype = cast_3_dtype_0, x = kv_cache_update_mask)[name = tensor("cast_8")]; tensor var_251 = expand_dims(axes = var_251_axes_0, x = cast_3)[name = tensor("op_251")]; tensor update_mask_axes_0 = const()[name = tensor("update_mask_axes_0"), val = tensor([2])]; tensor update_mask = expand_dims(axes = update_mask_axes_0, x = var_251)[name = tensor("update_mask")]; tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_1 = cast(dtype = cast_1_dtype_0, x = key_cache)[name = tensor("cast_7")]; tensor var_263 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = cast_1)[name = tensor("op_263")]; tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_4 = cast(dtype = cast_4_dtype_0, x = value_cache)[name = tensor("cast_6")]; tensor var_283 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = cast_4)[name = tensor("op_283")]; tensor var_295_axes_0 = const()[name = tensor("op_295_axes_0"), val = tensor([-1])]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = input_embeds)[name = tensor("cast_5")]; tensor var_295 = squeeze(axes = var_295_axes_0, x = cast_0)[name = tensor("op_295")]; tensor var_297_axes_0 = const()[name = tensor("op_297_axes_0"), val = tensor([-1])]; tensor var_297 = squeeze(axes = var_297_axes_0, x = var_295)[name = tensor("op_297")]; tensor hidden_states_1_axes_0 = const()[name = tensor("hidden_states_1_axes_0"), val = tensor([0])]; tensor hidden_states_1 = expand_dims(axes = hidden_states_1_axes_0, x = var_297)[name = tensor("hidden_states_1")]; tensor var_303_promoted = const()[name = tensor("op_303_promoted"), val = tensor(0x1p+1)]; tensor var_309 = pow(x = hidden_states_1, y = var_303_promoted)[name = tensor("op_309")]; tensor variance_1_axes_0 = const()[name = tensor("variance_1_axes_0"), val = tensor([-1])]; tensor variance_1_keep_dims_0 = const()[name = tensor("variance_1_keep_dims_0"), val = tensor(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_309)[name = tensor("variance_1")]; tensor const_1 = const()[name = tensor("const_1"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110158464)))]; tensor var_313 = mul(x = const_1, y = hidden_states_1)[name = tensor("op_313")]; tensor var_314 = const()[name = tensor("op_314"), val = tensor(0x1.0c6f7ap-20)]; tensor var_315 = add(x = variance_1, y = var_314)[name = tensor("op_315")]; tensor var_316_epsilon_0 = const()[name = tensor("op_316_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_316 = rsqrt(epsilon = var_316_epsilon_0, x = var_315)[name = tensor("op_316")]; tensor input_1 = mul(x = var_313, y = var_316)[name = tensor("input_1")]; tensor linear_0_bias_0 = const()[name = tensor("linear_0_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110162624)))]; tensor var_320 = linear(bias = linear_0_bias_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = input_1)[name = tensor("linear_0")]; tensor var_325 = const()[name = tensor("op_325"), val = tensor([1, 1, 16, 128])]; tensor var_326 = reshape(shape = var_325, x = var_320)[name = tensor("op_326")]; tensor linear_1_bias_0 = const()[name = tensor("linear_1_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110170880)))]; tensor var_332 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = input_1)[name = tensor("linear_1")]; tensor var_337 = const()[name = tensor("op_337"), val = tensor([1, 1, 8, 128])]; tensor var_338 = reshape(shape = var_337, x = var_332)[name = tensor("op_338")]; tensor var_344 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = input_1)[name = tensor("linear_2")]; tensor var_357_promoted = const()[name = tensor("op_357_promoted"), val = tensor(0x1p+1)]; tensor var_363 = pow(x = var_326, y = var_357_promoted)[name = tensor("op_363")]; tensor variance_3_keep_dims_0 = const()[name = tensor("variance_3_keep_dims_0"), val = tensor(true)]; tensor const_52 = const()[name = tensor("const_52"), val = tensor([3])]; tensor variance_3 = reduce_mean(axes = const_52, keep_dims = variance_3_keep_dims_0, x = var_363)[name = tensor("variance_3")]; tensor const_53 = const()[name = tensor("const_53"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110175040)))]; tensor var_367 = mul(x = const_53, y = var_326)[name = tensor("op_367")]; tensor var_368 = const()[name = tensor("op_368"), val = tensor(0x1.0c6f7ap-20)]; tensor var_369 = add(x = variance_3, y = var_368)[name = tensor("op_369")]; tensor var_370_epsilon_0 = const()[name = tensor("op_370_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_370 = rsqrt(epsilon = var_370_epsilon_0, x = var_369)[name = tensor("op_370")]; tensor q_1 = mul(x = var_367, y = var_370)[name = tensor("q_1")]; tensor var_375_promoted = const()[name = tensor("op_375_promoted"), val = tensor(0x1p+1)]; tensor var_381 = pow(x = var_338, y = var_375_promoted)[name = tensor("op_381")]; tensor variance_5_keep_dims_0 = const()[name = tensor("variance_5_keep_dims_0"), val = tensor(true)]; tensor const_54 = const()[name = tensor("const_54"), val = tensor([3])]; tensor variance_5 = reduce_mean(axes = const_54, keep_dims = variance_5_keep_dims_0, x = var_381)[name = tensor("variance_5")]; tensor const_55 = const()[name = tensor("const_55"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110175616)))]; tensor var_385 = mul(x = const_55, y = var_338)[name = tensor("op_385")]; tensor var_386 = const()[name = tensor("op_386"), val = tensor(0x1.0c6f7ap-20)]; tensor var_387 = add(x = variance_5, y = var_386)[name = tensor("op_387")]; tensor var_388_epsilon_0 = const()[name = tensor("op_388_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_388 = rsqrt(epsilon = var_388_epsilon_0, x = var_387)[name = tensor("op_388")]; tensor k_1 = mul(x = var_385, y = var_388)[name = tensor("k_1")]; tensor cos_r_1_axes_0 = const()[name = tensor("cos_r_1_axes_0"), val = tensor([1])]; tensor cos_r_1 = expand_dims(axes = cos_r_1_axes_0, x = var_234)[name = tensor("cos_r_1")]; tensor sin_r_1_axes_0 = const()[name = tensor("sin_r_1_axes_0"), val = tensor([1])]; tensor sin_r_1 = expand_dims(axes = sin_r_1_axes_0, x = var_242)[name = tensor("sin_r_1")]; tensor var_403 = mul(x = q_1, y = cos_r_1)[name = tensor("op_403")]; tensor x1_1_begin_0 = const()[name = tensor("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = tensor("x1_1_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_1_end_mask_0 = const()[name = tensor("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = tensor("x1_1")]; tensor x2_1_begin_0 = const()[name = tensor("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = tensor("x2_1_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_1_end_mask_0 = const()[name = tensor("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = tensor("x2_1")]; tensor const_6_promoted = const()[name = tensor("const_6_promoted"), val = tensor(-0x1p+0)]; tensor var_424 = mul(x = x2_1, y = const_6_promoted)[name = tensor("op_424")]; tensor var_426 = const()[name = tensor("op_426"), val = tensor(-1)]; tensor var_427_interleave_0 = const()[name = tensor("op_427_interleave_0"), val = tensor(false)]; tensor var_427 = concat(axis = var_426, interleave = var_427_interleave_0, values = (var_424, x1_1))[name = tensor("op_427")]; tensor var_428 = mul(x = var_427, y = sin_r_1)[name = tensor("op_428")]; tensor q_5 = add(x = var_403, y = var_428)[name = tensor("q_5")]; tensor var_431 = mul(x = k_1, y = cos_r_1)[name = tensor("op_431")]; tensor x1_3_begin_0 = const()[name = tensor("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = tensor("x1_3_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_3_end_mask_0 = const()[name = tensor("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = tensor("x1_3")]; tensor x2_3_begin_0 = const()[name = tensor("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = tensor("x2_3_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_3_end_mask_0 = const()[name = tensor("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = tensor("x2_3")]; tensor const_9_promoted = const()[name = tensor("const_9_promoted"), val = tensor(-0x1p+0)]; tensor var_452 = mul(x = x2_3, y = const_9_promoted)[name = tensor("op_452")]; tensor var_454 = const()[name = tensor("op_454"), val = tensor(-1)]; tensor var_455_interleave_0 = const()[name = tensor("op_455_interleave_0"), val = tensor(false)]; tensor var_455 = concat(axis = var_454, interleave = var_455_interleave_0, values = (var_452, x1_3))[name = tensor("op_455")]; tensor var_456 = mul(x = var_455, y = sin_r_1)[name = tensor("op_456")]; tensor k_5 = add(x = var_431, y = var_456)[name = tensor("k_5")]; tensor var_463 = const()[name = tensor("op_463"), val = tensor([1, 1024, 1, 1])]; tensor nk_1 = reshape(shape = var_463, x = k_5)[name = tensor("nk_1")]; tensor var_469 = const()[name = tensor("op_469"), val = tensor([1, 1024, 1, 1])]; tensor nv_1 = reshape(shape = var_469, x = var_344)[name = tensor("nv_1")]; tensor var_471 = const()[name = tensor("op_471"), val = tensor(0x1p+0)]; tensor var_473 = sub(x = var_471, y = update_mask)[name = tensor("op_473")]; tensor var_474 = mul(x = var_263, y = var_473)[name = tensor("op_474")]; tensor var_475 = mul(x = nk_1, y = update_mask)[name = tensor("op_475")]; tensor lkc_3 = add(x = var_474, y = var_475)[name = tensor("lkc_3")]; tensor var_481 = mul(x = var_283, y = var_473)[name = tensor("op_481")]; tensor var_482 = mul(x = nv_1, y = update_mask)[name = tensor("op_482")]; tensor lvc_3 = add(x = var_481, y = var_482)[name = tensor("lvc_3")]; tensor var_486_axes_0 = const()[name = tensor("op_486_axes_0"), val = tensor([2])]; tensor var_486 = squeeze(axes = var_486_axes_0, x = lkc_3)[name = tensor("op_486")]; tensor var_491 = const()[name = tensor("op_491"), val = tensor([1, 8, 128, 16])]; tensor kc_1 = reshape(shape = var_491, x = var_486)[name = tensor("kc_1")]; tensor var_494_axes_0 = const()[name = tensor("op_494_axes_0"), val = tensor([2])]; tensor var_494 = squeeze(axes = var_494_axes_0, x = lvc_3)[name = tensor("op_494")]; tensor var_499 = const()[name = tensor("op_499"), val = tensor([1, 8, 128, 16])]; tensor vc_1 = reshape(shape = var_499, x = var_494)[name = tensor("vc_1")]; tensor var_502_axes_0 = const()[name = tensor("op_502_axes_0"), val = tensor([2])]; tensor var_502 = expand_dims(axes = var_502_axes_0, x = kc_1)[name = tensor("op_502")]; tensor var_510_reps_0 = const()[name = tensor("op_510_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_510 = tile(reps = var_510_reps_0, x = var_502)[name = tensor("op_510")]; tensor var_515 = const()[name = tensor("op_515"), val = tensor([1, 16, 128, 16])]; tensor kc_3 = reshape(shape = var_515, x = var_510)[name = tensor("kc_3")]; tensor var_518_axes_0 = const()[name = tensor("op_518_axes_0"), val = tensor([2])]; tensor var_518 = expand_dims(axes = var_518_axes_0, x = vc_1)[name = tensor("op_518")]; tensor var_526_reps_0 = const()[name = tensor("op_526_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_526 = tile(reps = var_526_reps_0, x = var_518)[name = tensor("op_526")]; tensor var_531 = const()[name = tensor("op_531"), val = tensor([1, 16, 128, 16])]; tensor vc_3 = reshape(shape = var_531, x = var_526)[name = tensor("vc_3")]; tensor var_535_perm_0 = const()[name = tensor("op_535_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_536_transpose_x_0 = const()[name = tensor("op_536_transpose_x_0"), val = tensor(false)]; tensor var_536_transpose_y_0 = const()[name = tensor("op_536_transpose_y_0"), val = tensor(false)]; tensor var_535 = transpose(perm = var_535_perm_0, x = q_5)[name = tensor("transpose_9")]; tensor var_536 = matmul(transpose_x = var_536_transpose_x_0, transpose_y = var_536_transpose_y_0, x = var_535, y = kc_3)[name = tensor("op_536")]; tensor _inversed_aw_1_y_0 = const()[name = tensor("_inversed_aw_1_y_0"), val = tensor(0x1.6a09e6p-4)]; tensor _inversed_aw_1 = mul(x = var_536, y = _inversed_aw_1_y_0)[name = tensor("_inversed_aw_1")]; tensor var_540_axes_0 = const()[name = tensor("op_540_axes_0"), val = tensor([1])]; tensor cast_2 = cast(dtype = cast_2_dtype_0, x = key_padding_mask)[name = tensor("cast_4")]; tensor var_540 = expand_dims(axes = var_540_axes_0, x = cast_2)[name = tensor("op_540")]; tensor var_542_axes_0 = const()[name = tensor("op_542_axes_0"), val = tensor([2])]; tensor var_542 = expand_dims(axes = var_542_axes_0, x = var_540)[name = tensor("op_542")]; tensor aw_3 = add(x = _inversed_aw_1, y = var_542)[name = tensor("aw_3")]; tensor var_550 = const()[name = tensor("op_550"), val = tensor(-1)]; tensor aw_7 = softmax(axis = var_550, x = aw_3)[name = tensor("aw_7")]; tensor var_556_transpose_x_1 = const()[name = tensor("op_556_transpose_x_1"), val = tensor(false)]; tensor var_556_transpose_y_1 = const()[name = tensor("op_556_transpose_y_1"), val = tensor(true)]; tensor var_556 = matmul(transpose_x = var_556_transpose_x_1, transpose_y = var_556_transpose_y_1, x = aw_7, y = vc_3)[name = tensor("op_556")]; tensor var_559_perm_0 = const()[name = tensor("op_559_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_563 = const()[name = tensor("op_563"), val = tensor([1, 1, -1])]; tensor var_559 = transpose(perm = var_559_perm_0, x = var_556)[name = tensor("transpose_8")]; tensor input_3 = reshape(shape = var_563, x = var_559)[name = tensor("input_3")]; tensor var_567 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_o_proj_weight_palettized, x = input_3)[name = tensor("linear_3")]; tensor var_569_axes_0 = const()[name = tensor("op_569_axes_0"), val = tensor([0])]; tensor var_569 = squeeze(axes = var_569_axes_0, x = var_567)[name = tensor("op_569")]; tensor var_571_axes_0 = const()[name = tensor("op_571_axes_0"), val = tensor([0])]; tensor var_571 = squeeze(axes = var_571_axes_0, x = var_569)[name = tensor("op_571")]; tensor var_573_axes_0 = const()[name = tensor("op_573_axes_0"), val = tensor([-1])]; tensor var_573 = expand_dims(axes = var_573_axes_0, x = var_571)[name = tensor("op_573")]; tensor ao_1_axes_0 = const()[name = tensor("ao_1_axes_0"), val = tensor([-1])]; tensor ao_1 = expand_dims(axes = ao_1_axes_0, x = var_573)[name = tensor("ao_1")]; tensor hidden_1 = add(x = cast_0, y = ao_1)[name = tensor("hidden_1")]; tensor var_579_axes_0 = const()[name = tensor("op_579_axes_0"), val = tensor([-1])]; tensor var_579 = squeeze(axes = var_579_axes_0, x = hidden_1)[name = tensor("op_579")]; tensor var_581_axes_0 = const()[name = tensor("op_581_axes_0"), val = tensor([-1])]; tensor var_581 = squeeze(axes = var_581_axes_0, x = var_579)[name = tensor("op_581")]; tensor hidden_states_13_axes_0 = const()[name = tensor("hidden_states_13_axes_0"), val = tensor([0])]; tensor hidden_states_13 = expand_dims(axes = hidden_states_13_axes_0, x = var_581)[name = tensor("hidden_states_13")]; tensor var_587_promoted = const()[name = tensor("op_587_promoted"), val = tensor(0x1p+1)]; tensor var_593 = pow(x = hidden_states_13, y = var_587_promoted)[name = tensor("op_593")]; tensor variance_7_axes_0 = const()[name = tensor("variance_7_axes_0"), val = tensor([-1])]; tensor variance_7_keep_dims_0 = const()[name = tensor("variance_7_keep_dims_0"), val = tensor(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_593)[name = tensor("variance_7")]; tensor const_10 = const()[name = tensor("const_10"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110176192)))]; tensor var_597 = mul(x = const_10, y = hidden_states_13)[name = tensor("op_597")]; tensor var_598 = const()[name = tensor("op_598"), val = tensor(0x1.0c6f7ap-20)]; tensor var_599 = add(x = variance_7, y = var_598)[name = tensor("op_599")]; tensor var_600_epsilon_0 = const()[name = tensor("op_600_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_600 = rsqrt(epsilon = var_600_epsilon_0, x = var_599)[name = tensor("op_600")]; tensor input_5 = mul(x = var_597, y = var_600)[name = tensor("input_5")]; tensor linear_4_bias_0 = const()[name = tensor("linear_4_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110180352)))]; tensor input_7 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_5)[name = tensor("linear_4")]; tensor var_608 = silu(x = input_7)[name = tensor("op_608")]; tensor var_610 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_5)[name = tensor("linear_5")]; tensor input_9 = mul(x = var_608, y = var_610)[name = tensor("input_9")]; tensor var_613 = linear(bias = linear_1_bias_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_9)[name = tensor("linear_6")]; tensor var_615_axes_0 = const()[name = tensor("op_615_axes_0"), val = tensor([0])]; tensor var_615 = squeeze(axes = var_615_axes_0, x = var_613)[name = tensor("op_615")]; tensor var_617_axes_0 = const()[name = tensor("op_617_axes_0"), val = tensor([0])]; tensor var_617 = squeeze(axes = var_617_axes_0, x = var_615)[name = tensor("op_617")]; tensor var_619_axes_0 = const()[name = tensor("op_619_axes_0"), val = tensor([-1])]; tensor var_619 = expand_dims(axes = var_619_axes_0, x = var_617)[name = tensor("op_619")]; tensor h_1_axes_0 = const()[name = tensor("h_1_axes_0"), val = tensor([-1])]; tensor h_1 = expand_dims(axes = h_1_axes_0, x = var_619)[name = tensor("h_1")]; tensor hidden_3 = add(x = hidden_1, y = h_1)[name = tensor("hidden_3")]; tensor var_633_begin_0 = const()[name = tensor("op_633_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_633_end_0 = const()[name = tensor("op_633_end_0"), val = tensor([1, 2048, 1, 16])]; tensor var_633_end_mask_0 = const()[name = tensor("op_633_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_633 = slice_by_index(begin = var_633_begin_0, end = var_633_end_0, end_mask = var_633_end_mask_0, x = cast_1)[name = tensor("op_633")]; tensor var_653_begin_0 = const()[name = tensor("op_653_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_653_end_0 = const()[name = tensor("op_653_end_0"), val = tensor([1, 2048, 1, 16])]; tensor var_653_end_mask_0 = const()[name = tensor("op_653_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_653 = slice_by_index(begin = var_653_begin_0, end = var_653_end_0, end_mask = var_653_end_mask_0, x = cast_4)[name = tensor("op_653")]; tensor var_665_axes_0 = const()[name = tensor("op_665_axes_0"), val = tensor([-1])]; tensor var_665 = squeeze(axes = var_665_axes_0, x = hidden_3)[name = tensor("op_665")]; tensor var_667_axes_0 = const()[name = tensor("op_667_axes_0"), val = tensor([-1])]; tensor var_667 = squeeze(axes = var_667_axes_0, x = var_665)[name = tensor("op_667")]; tensor hidden_states_17_axes_0 = const()[name = tensor("hidden_states_17_axes_0"), val = tensor([0])]; tensor hidden_states_17 = expand_dims(axes = hidden_states_17_axes_0, x = var_667)[name = tensor("hidden_states_17")]; tensor var_673_promoted = const()[name = tensor("op_673_promoted"), val = tensor(0x1p+1)]; tensor var_679 = pow(x = hidden_states_17, y = var_673_promoted)[name = tensor("op_679")]; tensor variance_9_axes_0 = const()[name = tensor("variance_9_axes_0"), val = tensor([-1])]; tensor variance_9_keep_dims_0 = const()[name = tensor("variance_9_keep_dims_0"), val = tensor(true)]; tensor variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_679)[name = tensor("variance_9")]; tensor const_11 = const()[name = tensor("const_11"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110192704)))]; tensor var_683 = mul(x = const_11, y = hidden_states_17)[name = tensor("op_683")]; tensor var_684 = const()[name = tensor("op_684"), val = tensor(0x1.0c6f7ap-20)]; tensor var_685 = add(x = variance_9, y = var_684)[name = tensor("op_685")]; tensor var_686_epsilon_0 = const()[name = tensor("op_686_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_686 = rsqrt(epsilon = var_686_epsilon_0, x = var_685)[name = tensor("op_686")]; tensor input_11 = mul(x = var_683, y = var_686)[name = tensor("input_11")]; tensor var_690 = linear(bias = linear_0_bias_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = input_11)[name = tensor("linear_7")]; tensor var_695 = const()[name = tensor("op_695"), val = tensor([1, 1, 16, 128])]; tensor var_696 = reshape(shape = var_695, x = var_690)[name = tensor("op_696")]; tensor var_702 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = input_11)[name = tensor("linear_8")]; tensor var_707 = const()[name = tensor("op_707"), val = tensor([1, 1, 8, 128])]; tensor var_708 = reshape(shape = var_707, x = var_702)[name = tensor("op_708")]; tensor var_714 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = input_11)[name = tensor("linear_9")]; tensor var_727_promoted = const()[name = tensor("op_727_promoted"), val = tensor(0x1p+1)]; tensor var_733 = pow(x = var_696, y = var_727_promoted)[name = tensor("op_733")]; tensor variance_11_keep_dims_0 = const()[name = tensor("variance_11_keep_dims_0"), val = tensor(true)]; tensor const_56 = const()[name = tensor("const_56"), val = tensor([3])]; tensor variance_11 = reduce_mean(axes = const_56, keep_dims = variance_11_keep_dims_0, x = var_733)[name = tensor("variance_11")]; tensor const_57 = const()[name = tensor("const_57"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110196864)))]; tensor var_737 = mul(x = const_57, y = var_696)[name = tensor("op_737")]; tensor var_738 = const()[name = tensor("op_738"), val = tensor(0x1.0c6f7ap-20)]; tensor var_739 = add(x = variance_11, y = var_738)[name = tensor("op_739")]; tensor var_740_epsilon_0 = const()[name = tensor("op_740_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_740 = rsqrt(epsilon = var_740_epsilon_0, x = var_739)[name = tensor("op_740")]; tensor q_7 = mul(x = var_737, y = var_740)[name = tensor("q_7")]; tensor var_745_promoted = const()[name = tensor("op_745_promoted"), val = tensor(0x1p+1)]; tensor var_751 = pow(x = var_708, y = var_745_promoted)[name = tensor("op_751")]; tensor variance_13_keep_dims_0 = const()[name = tensor("variance_13_keep_dims_0"), val = tensor(true)]; tensor const_58 = const()[name = tensor("const_58"), val = tensor([3])]; tensor variance_13 = reduce_mean(axes = const_58, keep_dims = variance_13_keep_dims_0, x = var_751)[name = tensor("variance_13")]; tensor const_59 = const()[name = tensor("const_59"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110197440)))]; tensor var_755 = mul(x = const_59, y = var_708)[name = tensor("op_755")]; tensor var_756 = const()[name = tensor("op_756"), val = tensor(0x1.0c6f7ap-20)]; tensor var_757 = add(x = variance_13, y = var_756)[name = tensor("op_757")]; tensor var_758_epsilon_0 = const()[name = tensor("op_758_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_758 = rsqrt(epsilon = var_758_epsilon_0, x = var_757)[name = tensor("op_758")]; tensor k_7 = mul(x = var_755, y = var_758)[name = tensor("k_7")]; tensor var_773 = mul(x = q_7, y = cos_r_1)[name = tensor("op_773")]; tensor x1_5_begin_0 = const()[name = tensor("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = tensor("x1_5_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_5_end_mask_0 = const()[name = tensor("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_7)[name = tensor("x1_5")]; tensor x2_5_begin_0 = const()[name = tensor("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = tensor("x2_5_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_5_end_mask_0 = const()[name = tensor("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_7)[name = tensor("x2_5")]; tensor const_16_promoted = const()[name = tensor("const_16_promoted"), val = tensor(-0x1p+0)]; tensor var_794 = mul(x = x2_5, y = const_16_promoted)[name = tensor("op_794")]; tensor var_796 = const()[name = tensor("op_796"), val = tensor(-1)]; tensor var_797_interleave_0 = const()[name = tensor("op_797_interleave_0"), val = tensor(false)]; tensor var_797 = concat(axis = var_796, interleave = var_797_interleave_0, values = (var_794, x1_5))[name = tensor("op_797")]; tensor var_798 = mul(x = var_797, y = sin_r_1)[name = tensor("op_798")]; tensor q_11 = add(x = var_773, y = var_798)[name = tensor("q_11")]; tensor var_801 = mul(x = k_7, y = cos_r_1)[name = tensor("op_801")]; tensor x1_7_begin_0 = const()[name = tensor("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = tensor("x1_7_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_7_end_mask_0 = const()[name = tensor("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_7)[name = tensor("x1_7")]; tensor x2_7_begin_0 = const()[name = tensor("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = tensor("x2_7_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_7_end_mask_0 = const()[name = tensor("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_7)[name = tensor("x2_7")]; tensor const_19_promoted = const()[name = tensor("const_19_promoted"), val = tensor(-0x1p+0)]; tensor var_822 = mul(x = x2_7, y = const_19_promoted)[name = tensor("op_822")]; tensor var_824 = const()[name = tensor("op_824"), val = tensor(-1)]; tensor var_825_interleave_0 = const()[name = tensor("op_825_interleave_0"), val = tensor(false)]; tensor var_825 = concat(axis = var_824, interleave = var_825_interleave_0, values = (var_822, x1_7))[name = tensor("op_825")]; tensor var_826 = mul(x = var_825, y = sin_r_1)[name = tensor("op_826")]; tensor k_11 = add(x = var_801, y = var_826)[name = tensor("k_11")]; tensor var_833 = const()[name = tensor("op_833"), val = tensor([1, 1024, 1, 1])]; tensor nk_3 = reshape(shape = var_833, x = k_11)[name = tensor("nk_3")]; tensor var_839 = const()[name = tensor("op_839"), val = tensor([1, 1024, 1, 1])]; tensor nv_3 = reshape(shape = var_839, x = var_714)[name = tensor("nv_3")]; tensor var_844 = mul(x = var_633, y = var_473)[name = tensor("op_844")]; tensor var_845 = mul(x = nk_3, y = update_mask)[name = tensor("op_845")]; tensor lkc_7 = add(x = var_844, y = var_845)[name = tensor("lkc_7")]; tensor var_851 = mul(x = var_653, y = var_473)[name = tensor("op_851")]; tensor var_852 = mul(x = nv_3, y = update_mask)[name = tensor("op_852")]; tensor lvc_7 = add(x = var_851, y = var_852)[name = tensor("lvc_7")]; tensor var_856_axes_0 = const()[name = tensor("op_856_axes_0"), val = tensor([2])]; tensor var_856 = squeeze(axes = var_856_axes_0, x = lkc_7)[name = tensor("op_856")]; tensor var_861 = const()[name = tensor("op_861"), val = tensor([1, 8, 128, 16])]; tensor kc_5 = reshape(shape = var_861, x = var_856)[name = tensor("kc_5")]; tensor var_864_axes_0 = const()[name = tensor("op_864_axes_0"), val = tensor([2])]; tensor var_864 = squeeze(axes = var_864_axes_0, x = lvc_7)[name = tensor("op_864")]; tensor var_869 = const()[name = tensor("op_869"), val = tensor([1, 8, 128, 16])]; tensor vc_5 = reshape(shape = var_869, x = var_864)[name = tensor("vc_5")]; tensor var_872_axes_0 = const()[name = tensor("op_872_axes_0"), val = tensor([2])]; tensor var_872 = expand_dims(axes = var_872_axes_0, x = kc_5)[name = tensor("op_872")]; tensor var_880_reps_0 = const()[name = tensor("op_880_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_880 = tile(reps = var_880_reps_0, x = var_872)[name = tensor("op_880")]; tensor var_885 = const()[name = tensor("op_885"), val = tensor([1, 16, 128, 16])]; tensor kc_7 = reshape(shape = var_885, x = var_880)[name = tensor("kc_7")]; tensor var_888_axes_0 = const()[name = tensor("op_888_axes_0"), val = tensor([2])]; tensor var_888 = expand_dims(axes = var_888_axes_0, x = vc_5)[name = tensor("op_888")]; tensor var_896_reps_0 = const()[name = tensor("op_896_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_896 = tile(reps = var_896_reps_0, x = var_888)[name = tensor("op_896")]; tensor var_901 = const()[name = tensor("op_901"), val = tensor([1, 16, 128, 16])]; tensor vc_7 = reshape(shape = var_901, x = var_896)[name = tensor("vc_7")]; tensor var_905_perm_0 = const()[name = tensor("op_905_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_906_transpose_x_0 = const()[name = tensor("op_906_transpose_x_0"), val = tensor(false)]; tensor var_906_transpose_y_0 = const()[name = tensor("op_906_transpose_y_0"), val = tensor(false)]; tensor var_905 = transpose(perm = var_905_perm_0, x = q_11)[name = tensor("transpose_7")]; tensor var_906 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_905, y = kc_7)[name = tensor("op_906")]; tensor _inversed_aw_9_y_0 = const()[name = tensor("_inversed_aw_9_y_0"), val = tensor(0x1.6a09e6p-4)]; tensor _inversed_aw_9 = mul(x = var_906, y = _inversed_aw_9_y_0)[name = tensor("_inversed_aw_9")]; tensor aw_11 = add(x = _inversed_aw_9, y = var_542)[name = tensor("aw_11")]; tensor var_920 = const()[name = tensor("op_920"), val = tensor(-1)]; tensor aw_15 = softmax(axis = var_920, x = aw_11)[name = tensor("aw_15")]; tensor var_926_transpose_x_1 = const()[name = tensor("op_926_transpose_x_1"), val = tensor(false)]; tensor var_926_transpose_y_1 = const()[name = tensor("op_926_transpose_y_1"), val = tensor(true)]; tensor var_926 = matmul(transpose_x = var_926_transpose_x_1, transpose_y = var_926_transpose_y_1, x = aw_15, y = vc_7)[name = tensor("op_926")]; tensor var_929_perm_0 = const()[name = tensor("op_929_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_933 = const()[name = tensor("op_933"), val = tensor([1, 1, -1])]; tensor var_929 = transpose(perm = var_929_perm_0, x = var_926)[name = tensor("transpose_6")]; tensor input_13 = reshape(shape = var_933, x = var_929)[name = tensor("input_13")]; tensor var_937 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_o_proj_weight_palettized, x = input_13)[name = tensor("linear_10")]; tensor var_939_axes_0 = const()[name = tensor("op_939_axes_0"), val = tensor([0])]; tensor var_939 = squeeze(axes = var_939_axes_0, x = var_937)[name = tensor("op_939")]; tensor var_941_axes_0 = const()[name = tensor("op_941_axes_0"), val = tensor([0])]; tensor var_941 = squeeze(axes = var_941_axes_0, x = var_939)[name = tensor("op_941")]; tensor var_943_axes_0 = const()[name = tensor("op_943_axes_0"), val = tensor([-1])]; tensor var_943 = expand_dims(axes = var_943_axes_0, x = var_941)[name = tensor("op_943")]; tensor ao_3_axes_0 = const()[name = tensor("ao_3_axes_0"), val = tensor([-1])]; tensor ao_3 = expand_dims(axes = ao_3_axes_0, x = var_943)[name = tensor("ao_3")]; tensor hidden_5 = add(x = hidden_3, y = ao_3)[name = tensor("hidden_5")]; tensor var_949_axes_0 = const()[name = tensor("op_949_axes_0"), val = tensor([-1])]; tensor var_949 = squeeze(axes = var_949_axes_0, x = hidden_5)[name = tensor("op_949")]; tensor var_951_axes_0 = const()[name = tensor("op_951_axes_0"), val = tensor([-1])]; tensor var_951 = squeeze(axes = var_951_axes_0, x = var_949)[name = tensor("op_951")]; tensor hidden_states_29_axes_0 = const()[name = tensor("hidden_states_29_axes_0"), val = tensor([0])]; tensor hidden_states_29 = expand_dims(axes = hidden_states_29_axes_0, x = var_951)[name = tensor("hidden_states_29")]; tensor var_957_promoted = const()[name = tensor("op_957_promoted"), val = tensor(0x1p+1)]; tensor var_963 = pow(x = hidden_states_29, y = var_957_promoted)[name = tensor("op_963")]; tensor variance_15_axes_0 = const()[name = tensor("variance_15_axes_0"), val = tensor([-1])]; tensor variance_15_keep_dims_0 = const()[name = tensor("variance_15_keep_dims_0"), val = tensor(true)]; tensor variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_963)[name = tensor("variance_15")]; tensor const_20 = const()[name = tensor("const_20"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110198016)))]; tensor var_967 = mul(x = const_20, y = hidden_states_29)[name = tensor("op_967")]; tensor var_968 = const()[name = tensor("op_968"), val = tensor(0x1.0c6f7ap-20)]; tensor var_969 = add(x = variance_15, y = var_968)[name = tensor("op_969")]; tensor var_970_epsilon_0 = const()[name = tensor("op_970_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_970 = rsqrt(epsilon = var_970_epsilon_0, x = var_969)[name = tensor("op_970")]; tensor input_15 = mul(x = var_967, y = var_970)[name = tensor("input_15")]; tensor input_17 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_15)[name = tensor("linear_11")]; tensor var_978 = silu(x = input_17)[name = tensor("op_978")]; tensor var_980 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_15)[name = tensor("linear_12")]; tensor input_19 = mul(x = var_978, y = var_980)[name = tensor("input_19")]; tensor var_983 = linear(bias = linear_1_bias_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_19)[name = tensor("linear_13")]; tensor var_985_axes_0 = const()[name = tensor("op_985_axes_0"), val = tensor([0])]; tensor var_985 = squeeze(axes = var_985_axes_0, x = var_983)[name = tensor("op_985")]; tensor var_987_axes_0 = const()[name = tensor("op_987_axes_0"), val = tensor([0])]; tensor var_987 = squeeze(axes = var_987_axes_0, x = var_985)[name = tensor("op_987")]; tensor var_989_axes_0 = const()[name = tensor("op_989_axes_0"), val = tensor([-1])]; tensor var_989 = expand_dims(axes = var_989_axes_0, x = var_987)[name = tensor("op_989")]; tensor h_3_axes_0 = const()[name = tensor("h_3_axes_0"), val = tensor([-1])]; tensor h_3 = expand_dims(axes = h_3_axes_0, x = var_989)[name = tensor("h_3")]; tensor hidden_7 = add(x = hidden_5, y = h_3)[name = tensor("hidden_7")]; tensor var_1003_begin_0 = const()[name = tensor("op_1003_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1003_end_0 = const()[name = tensor("op_1003_end_0"), val = tensor([1, 3072, 1, 16])]; tensor var_1003_end_mask_0 = const()[name = tensor("op_1003_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1003 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = cast_1)[name = tensor("op_1003")]; tensor var_1023_begin_0 = const()[name = tensor("op_1023_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1023_end_0 = const()[name = tensor("op_1023_end_0"), val = tensor([1, 3072, 1, 16])]; tensor var_1023_end_mask_0 = const()[name = tensor("op_1023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1023 = slice_by_index(begin = var_1023_begin_0, end = var_1023_end_0, end_mask = var_1023_end_mask_0, x = cast_4)[name = tensor("op_1023")]; tensor var_1035_axes_0 = const()[name = tensor("op_1035_axes_0"), val = tensor([-1])]; tensor var_1035 = squeeze(axes = var_1035_axes_0, x = hidden_7)[name = tensor("op_1035")]; tensor var_1037_axes_0 = const()[name = tensor("op_1037_axes_0"), val = tensor([-1])]; tensor var_1037 = squeeze(axes = var_1037_axes_0, x = var_1035)[name = tensor("op_1037")]; tensor hidden_states_33_axes_0 = const()[name = tensor("hidden_states_33_axes_0"), val = tensor([0])]; tensor hidden_states_33 = expand_dims(axes = hidden_states_33_axes_0, x = var_1037)[name = tensor("hidden_states_33")]; tensor var_1043_promoted = const()[name = tensor("op_1043_promoted"), val = tensor(0x1p+1)]; tensor var_1049 = pow(x = hidden_states_33, y = var_1043_promoted)[name = tensor("op_1049")]; tensor variance_17_axes_0 = const()[name = tensor("variance_17_axes_0"), val = tensor([-1])]; tensor variance_17_keep_dims_0 = const()[name = tensor("variance_17_keep_dims_0"), val = tensor(true)]; tensor variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1049)[name = tensor("variance_17")]; tensor const_21 = const()[name = tensor("const_21"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110202176)))]; tensor var_1053 = mul(x = const_21, y = hidden_states_33)[name = tensor("op_1053")]; tensor var_1054 = const()[name = tensor("op_1054"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1055 = add(x = variance_17, y = var_1054)[name = tensor("op_1055")]; tensor var_1056_epsilon_0 = const()[name = tensor("op_1056_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1056 = rsqrt(epsilon = var_1056_epsilon_0, x = var_1055)[name = tensor("op_1056")]; tensor input_21 = mul(x = var_1053, y = var_1056)[name = tensor("input_21")]; tensor var_1060 = linear(bias = linear_0_bias_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = input_21)[name = tensor("linear_14")]; tensor var_1065 = const()[name = tensor("op_1065"), val = tensor([1, 1, 16, 128])]; tensor var_1066 = reshape(shape = var_1065, x = var_1060)[name = tensor("op_1066")]; tensor var_1072 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = input_21)[name = tensor("linear_15")]; tensor var_1077 = const()[name = tensor("op_1077"), val = tensor([1, 1, 8, 128])]; tensor var_1078 = reshape(shape = var_1077, x = var_1072)[name = tensor("op_1078")]; tensor var_1084 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = input_21)[name = tensor("linear_16")]; tensor var_1097_promoted = const()[name = tensor("op_1097_promoted"), val = tensor(0x1p+1)]; tensor var_1103 = pow(x = var_1066, y = var_1097_promoted)[name = tensor("op_1103")]; tensor variance_19_keep_dims_0 = const()[name = tensor("variance_19_keep_dims_0"), val = tensor(true)]; tensor const_60 = const()[name = tensor("const_60"), val = tensor([3])]; tensor variance_19 = reduce_mean(axes = const_60, keep_dims = variance_19_keep_dims_0, x = var_1103)[name = tensor("variance_19")]; tensor const_61 = const()[name = tensor("const_61"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110206336)))]; tensor var_1107 = mul(x = const_61, y = var_1066)[name = tensor("op_1107")]; tensor var_1108 = const()[name = tensor("op_1108"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1109 = add(x = variance_19, y = var_1108)[name = tensor("op_1109")]; tensor var_1110_epsilon_0 = const()[name = tensor("op_1110_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1110 = rsqrt(epsilon = var_1110_epsilon_0, x = var_1109)[name = tensor("op_1110")]; tensor q_13 = mul(x = var_1107, y = var_1110)[name = tensor("q_13")]; tensor var_1115_promoted = const()[name = tensor("op_1115_promoted"), val = tensor(0x1p+1)]; tensor var_1121 = pow(x = var_1078, y = var_1115_promoted)[name = tensor("op_1121")]; tensor variance_21_keep_dims_0 = const()[name = tensor("variance_21_keep_dims_0"), val = tensor(true)]; tensor const_62 = const()[name = tensor("const_62"), val = tensor([3])]; tensor variance_21 = reduce_mean(axes = const_62, keep_dims = variance_21_keep_dims_0, x = var_1121)[name = tensor("variance_21")]; tensor const_63 = const()[name = tensor("const_63"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110206912)))]; tensor var_1125 = mul(x = const_63, y = var_1078)[name = tensor("op_1125")]; tensor var_1126 = const()[name = tensor("op_1126"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1127 = add(x = variance_21, y = var_1126)[name = tensor("op_1127")]; tensor var_1128_epsilon_0 = const()[name = tensor("op_1128_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1128 = rsqrt(epsilon = var_1128_epsilon_0, x = var_1127)[name = tensor("op_1128")]; tensor k_13 = mul(x = var_1125, y = var_1128)[name = tensor("k_13")]; tensor var_1143 = mul(x = q_13, y = cos_r_1)[name = tensor("op_1143")]; tensor x1_9_begin_0 = const()[name = tensor("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = tensor("x1_9_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_9_end_mask_0 = const()[name = tensor("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_13)[name = tensor("x1_9")]; tensor x2_9_begin_0 = const()[name = tensor("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = tensor("x2_9_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_9_end_mask_0 = const()[name = tensor("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_13)[name = tensor("x2_9")]; tensor const_26_promoted = const()[name = tensor("const_26_promoted"), val = tensor(-0x1p+0)]; tensor var_1164 = mul(x = x2_9, y = const_26_promoted)[name = tensor("op_1164")]; tensor var_1166 = const()[name = tensor("op_1166"), val = tensor(-1)]; tensor var_1167_interleave_0 = const()[name = tensor("op_1167_interleave_0"), val = tensor(false)]; tensor var_1167 = concat(axis = var_1166, interleave = var_1167_interleave_0, values = (var_1164, x1_9))[name = tensor("op_1167")]; tensor var_1168 = mul(x = var_1167, y = sin_r_1)[name = tensor("op_1168")]; tensor q_17 = add(x = var_1143, y = var_1168)[name = tensor("q_17")]; tensor var_1171 = mul(x = k_13, y = cos_r_1)[name = tensor("op_1171")]; tensor x1_11_begin_0 = const()[name = tensor("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = tensor("x1_11_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_11_end_mask_0 = const()[name = tensor("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_13)[name = tensor("x1_11")]; tensor x2_11_begin_0 = const()[name = tensor("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = tensor("x2_11_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_11_end_mask_0 = const()[name = tensor("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_13)[name = tensor("x2_11")]; tensor const_29_promoted = const()[name = tensor("const_29_promoted"), val = tensor(-0x1p+0)]; tensor var_1192 = mul(x = x2_11, y = const_29_promoted)[name = tensor("op_1192")]; tensor var_1194 = const()[name = tensor("op_1194"), val = tensor(-1)]; tensor var_1195_interleave_0 = const()[name = tensor("op_1195_interleave_0"), val = tensor(false)]; tensor var_1195 = concat(axis = var_1194, interleave = var_1195_interleave_0, values = (var_1192, x1_11))[name = tensor("op_1195")]; tensor var_1196 = mul(x = var_1195, y = sin_r_1)[name = tensor("op_1196")]; tensor k_17 = add(x = var_1171, y = var_1196)[name = tensor("k_17")]; tensor var_1203 = const()[name = tensor("op_1203"), val = tensor([1, 1024, 1, 1])]; tensor nk_5 = reshape(shape = var_1203, x = k_17)[name = tensor("nk_5")]; tensor var_1209 = const()[name = tensor("op_1209"), val = tensor([1, 1024, 1, 1])]; tensor nv_5 = reshape(shape = var_1209, x = var_1084)[name = tensor("nv_5")]; tensor var_1214 = mul(x = var_1003, y = var_473)[name = tensor("op_1214")]; tensor var_1215 = mul(x = nk_5, y = update_mask)[name = tensor("op_1215")]; tensor lkc_11 = add(x = var_1214, y = var_1215)[name = tensor("lkc_11")]; tensor var_1221 = mul(x = var_1023, y = var_473)[name = tensor("op_1221")]; tensor var_1222 = mul(x = nv_5, y = update_mask)[name = tensor("op_1222")]; tensor lvc_11 = add(x = var_1221, y = var_1222)[name = tensor("lvc_11")]; tensor var_1226_axes_0 = const()[name = tensor("op_1226_axes_0"), val = tensor([2])]; tensor var_1226 = squeeze(axes = var_1226_axes_0, x = lkc_11)[name = tensor("op_1226")]; tensor var_1231 = const()[name = tensor("op_1231"), val = tensor([1, 8, 128, 16])]; tensor kc_9 = reshape(shape = var_1231, x = var_1226)[name = tensor("kc_9")]; tensor var_1234_axes_0 = const()[name = tensor("op_1234_axes_0"), val = tensor([2])]; tensor var_1234 = squeeze(axes = var_1234_axes_0, x = lvc_11)[name = tensor("op_1234")]; tensor var_1239 = const()[name = tensor("op_1239"), val = tensor([1, 8, 128, 16])]; tensor vc_9 = reshape(shape = var_1239, x = var_1234)[name = tensor("vc_9")]; tensor var_1242_axes_0 = const()[name = tensor("op_1242_axes_0"), val = tensor([2])]; tensor var_1242 = expand_dims(axes = var_1242_axes_0, x = kc_9)[name = tensor("op_1242")]; tensor var_1250_reps_0 = const()[name = tensor("op_1250_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1250 = tile(reps = var_1250_reps_0, x = var_1242)[name = tensor("op_1250")]; tensor var_1255 = const()[name = tensor("op_1255"), val = tensor([1, 16, 128, 16])]; tensor kc_11 = reshape(shape = var_1255, x = var_1250)[name = tensor("kc_11")]; tensor var_1258_axes_0 = const()[name = tensor("op_1258_axes_0"), val = tensor([2])]; tensor var_1258 = expand_dims(axes = var_1258_axes_0, x = vc_9)[name = tensor("op_1258")]; tensor var_1266_reps_0 = const()[name = tensor("op_1266_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1266 = tile(reps = var_1266_reps_0, x = var_1258)[name = tensor("op_1266")]; tensor var_1271 = const()[name = tensor("op_1271"), val = tensor([1, 16, 128, 16])]; tensor vc_11 = reshape(shape = var_1271, x = var_1266)[name = tensor("vc_11")]; tensor var_1275_perm_0 = const()[name = tensor("op_1275_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_1276_transpose_x_0 = const()[name = tensor("op_1276_transpose_x_0"), val = tensor(false)]; tensor var_1276_transpose_y_0 = const()[name = tensor("op_1276_transpose_y_0"), val = tensor(false)]; tensor var_1275 = transpose(perm = var_1275_perm_0, x = q_17)[name = tensor("transpose_5")]; tensor var_1276 = matmul(transpose_x = var_1276_transpose_x_0, transpose_y = var_1276_transpose_y_0, x = var_1275, y = kc_11)[name = tensor("op_1276")]; tensor _inversed_aw_17_y_0 = const()[name = tensor("_inversed_aw_17_y_0"), val = tensor(0x1.6a09e6p-4)]; tensor _inversed_aw_17 = mul(x = var_1276, y = _inversed_aw_17_y_0)[name = tensor("_inversed_aw_17")]; tensor aw_19 = add(x = _inversed_aw_17, y = var_542)[name = tensor("aw_19")]; tensor var_1290 = const()[name = tensor("op_1290"), val = tensor(-1)]; tensor aw_23 = softmax(axis = var_1290, x = aw_19)[name = tensor("aw_23")]; tensor var_1296_transpose_x_1 = const()[name = tensor("op_1296_transpose_x_1"), val = tensor(false)]; tensor var_1296_transpose_y_1 = const()[name = tensor("op_1296_transpose_y_1"), val = tensor(true)]; tensor var_1296 = matmul(transpose_x = var_1296_transpose_x_1, transpose_y = var_1296_transpose_y_1, x = aw_23, y = vc_11)[name = tensor("op_1296")]; tensor var_1299_perm_0 = const()[name = tensor("op_1299_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1303 = const()[name = tensor("op_1303"), val = tensor([1, 1, -1])]; tensor var_1299 = transpose(perm = var_1299_perm_0, x = var_1296)[name = tensor("transpose_4")]; tensor input_23 = reshape(shape = var_1303, x = var_1299)[name = tensor("input_23")]; tensor var_1307 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_o_proj_weight_palettized, x = input_23)[name = tensor("linear_17")]; tensor var_1309_axes_0 = const()[name = tensor("op_1309_axes_0"), val = tensor([0])]; tensor var_1309 = squeeze(axes = var_1309_axes_0, x = var_1307)[name = tensor("op_1309")]; tensor var_1311_axes_0 = const()[name = tensor("op_1311_axes_0"), val = tensor([0])]; tensor var_1311 = squeeze(axes = var_1311_axes_0, x = var_1309)[name = tensor("op_1311")]; tensor var_1313_axes_0 = const()[name = tensor("op_1313_axes_0"), val = tensor([-1])]; tensor var_1313 = expand_dims(axes = var_1313_axes_0, x = var_1311)[name = tensor("op_1313")]; tensor ao_5_axes_0 = const()[name = tensor("ao_5_axes_0"), val = tensor([-1])]; tensor ao_5 = expand_dims(axes = ao_5_axes_0, x = var_1313)[name = tensor("ao_5")]; tensor hidden_9 = add(x = hidden_7, y = ao_5)[name = tensor("hidden_9")]; tensor var_1319_axes_0 = const()[name = tensor("op_1319_axes_0"), val = tensor([-1])]; tensor var_1319 = squeeze(axes = var_1319_axes_0, x = hidden_9)[name = tensor("op_1319")]; tensor var_1321_axes_0 = const()[name = tensor("op_1321_axes_0"), val = tensor([-1])]; tensor var_1321 = squeeze(axes = var_1321_axes_0, x = var_1319)[name = tensor("op_1321")]; tensor hidden_states_45_axes_0 = const()[name = tensor("hidden_states_45_axes_0"), val = tensor([0])]; tensor hidden_states_45 = expand_dims(axes = hidden_states_45_axes_0, x = var_1321)[name = tensor("hidden_states_45")]; tensor var_1327_promoted = const()[name = tensor("op_1327_promoted"), val = tensor(0x1p+1)]; tensor var_1333 = pow(x = hidden_states_45, y = var_1327_promoted)[name = tensor("op_1333")]; tensor variance_23_axes_0 = const()[name = tensor("variance_23_axes_0"), val = tensor([-1])]; tensor variance_23_keep_dims_0 = const()[name = tensor("variance_23_keep_dims_0"), val = tensor(true)]; tensor variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1333)[name = tensor("variance_23")]; tensor const_30 = const()[name = tensor("const_30"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110207488)))]; tensor var_1337 = mul(x = const_30, y = hidden_states_45)[name = tensor("op_1337")]; tensor var_1338 = const()[name = tensor("op_1338"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1339 = add(x = variance_23, y = var_1338)[name = tensor("op_1339")]; tensor var_1340_epsilon_0 = const()[name = tensor("op_1340_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1340 = rsqrt(epsilon = var_1340_epsilon_0, x = var_1339)[name = tensor("op_1340")]; tensor input_25 = mul(x = var_1337, y = var_1340)[name = tensor("input_25")]; tensor input_27 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_25)[name = tensor("linear_18")]; tensor var_1348 = silu(x = input_27)[name = tensor("op_1348")]; tensor var_1350 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_25)[name = tensor("linear_19")]; tensor input_29 = mul(x = var_1348, y = var_1350)[name = tensor("input_29")]; tensor var_1353 = linear(bias = linear_1_bias_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_29)[name = tensor("linear_20")]; tensor var_1355_axes_0 = const()[name = tensor("op_1355_axes_0"), val = tensor([0])]; tensor var_1355 = squeeze(axes = var_1355_axes_0, x = var_1353)[name = tensor("op_1355")]; tensor var_1357_axes_0 = const()[name = tensor("op_1357_axes_0"), val = tensor([0])]; tensor var_1357 = squeeze(axes = var_1357_axes_0, x = var_1355)[name = tensor("op_1357")]; tensor var_1359_axes_0 = const()[name = tensor("op_1359_axes_0"), val = tensor([-1])]; tensor var_1359 = expand_dims(axes = var_1359_axes_0, x = var_1357)[name = tensor("op_1359")]; tensor h_5_axes_0 = const()[name = tensor("h_5_axes_0"), val = tensor([-1])]; tensor h_5 = expand_dims(axes = h_5_axes_0, x = var_1359)[name = tensor("h_5")]; tensor hidden_11 = add(x = hidden_9, y = h_5)[name = tensor("hidden_11")]; tensor var_1373_begin_0 = const()[name = tensor("op_1373_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_1373_end_0 = const()[name = tensor("op_1373_end_0"), val = tensor([1, 4096, 1, 16])]; tensor var_1373_end_mask_0 = const()[name = tensor("op_1373_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1373 = slice_by_index(begin = var_1373_begin_0, end = var_1373_end_0, end_mask = var_1373_end_mask_0, x = cast_1)[name = tensor("op_1373")]; tensor var_1393_begin_0 = const()[name = tensor("op_1393_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_1393_end_0 = const()[name = tensor("op_1393_end_0"), val = tensor([1, 4096, 1, 16])]; tensor var_1393_end_mask_0 = const()[name = tensor("op_1393_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1393 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, x = cast_4)[name = tensor("op_1393")]; tensor var_1405_axes_0 = const()[name = tensor("op_1405_axes_0"), val = tensor([-1])]; tensor var_1405 = squeeze(axes = var_1405_axes_0, x = hidden_11)[name = tensor("op_1405")]; tensor var_1407_axes_0 = const()[name = tensor("op_1407_axes_0"), val = tensor([-1])]; tensor var_1407 = squeeze(axes = var_1407_axes_0, x = var_1405)[name = tensor("op_1407")]; tensor hidden_states_49_axes_0 = const()[name = tensor("hidden_states_49_axes_0"), val = tensor([0])]; tensor hidden_states_49 = expand_dims(axes = hidden_states_49_axes_0, x = var_1407)[name = tensor("hidden_states_49")]; tensor var_1413_promoted = const()[name = tensor("op_1413_promoted"), val = tensor(0x1p+1)]; tensor var_1419 = pow(x = hidden_states_49, y = var_1413_promoted)[name = tensor("op_1419")]; tensor variance_25_axes_0 = const()[name = tensor("variance_25_axes_0"), val = tensor([-1])]; tensor variance_25_keep_dims_0 = const()[name = tensor("variance_25_keep_dims_0"), val = tensor(true)]; tensor variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1419)[name = tensor("variance_25")]; tensor const_31 = const()[name = tensor("const_31"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110211648)))]; tensor var_1423 = mul(x = const_31, y = hidden_states_49)[name = tensor("op_1423")]; tensor var_1424 = const()[name = tensor("op_1424"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1425 = add(x = variance_25, y = var_1424)[name = tensor("op_1425")]; tensor var_1426_epsilon_0 = const()[name = tensor("op_1426_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1426 = rsqrt(epsilon = var_1426_epsilon_0, x = var_1425)[name = tensor("op_1426")]; tensor input_31 = mul(x = var_1423, y = var_1426)[name = tensor("input_31")]; tensor var_1430 = linear(bias = linear_0_bias_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = input_31)[name = tensor("linear_21")]; tensor var_1435 = const()[name = tensor("op_1435"), val = tensor([1, 1, 16, 128])]; tensor var_1436 = reshape(shape = var_1435, x = var_1430)[name = tensor("op_1436")]; tensor var_1442 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = input_31)[name = tensor("linear_22")]; tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1, 8, 128])]; tensor var_1448 = reshape(shape = var_1447, x = var_1442)[name = tensor("op_1448")]; tensor var_1454 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = input_31)[name = tensor("linear_23")]; tensor var_1467_promoted = const()[name = tensor("op_1467_promoted"), val = tensor(0x1p+1)]; tensor var_1473 = pow(x = var_1436, y = var_1467_promoted)[name = tensor("op_1473")]; tensor variance_27_keep_dims_0 = const()[name = tensor("variance_27_keep_dims_0"), val = tensor(true)]; tensor const_64 = const()[name = tensor("const_64"), val = tensor([3])]; tensor variance_27 = reduce_mean(axes = const_64, keep_dims = variance_27_keep_dims_0, x = var_1473)[name = tensor("variance_27")]; tensor const_65 = const()[name = tensor("const_65"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110215808)))]; tensor var_1477 = mul(x = const_65, y = var_1436)[name = tensor("op_1477")]; tensor var_1478 = const()[name = tensor("op_1478"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1479 = add(x = variance_27, y = var_1478)[name = tensor("op_1479")]; tensor var_1480_epsilon_0 = const()[name = tensor("op_1480_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1480 = rsqrt(epsilon = var_1480_epsilon_0, x = var_1479)[name = tensor("op_1480")]; tensor q_19 = mul(x = var_1477, y = var_1480)[name = tensor("q_19")]; tensor var_1485_promoted = const()[name = tensor("op_1485_promoted"), val = tensor(0x1p+1)]; tensor var_1491 = pow(x = var_1448, y = var_1485_promoted)[name = tensor("op_1491")]; tensor variance_29_keep_dims_0 = const()[name = tensor("variance_29_keep_dims_0"), val = tensor(true)]; tensor const_66 = const()[name = tensor("const_66"), val = tensor([3])]; tensor variance_29 = reduce_mean(axes = const_66, keep_dims = variance_29_keep_dims_0, x = var_1491)[name = tensor("variance_29")]; tensor const_67 = const()[name = tensor("const_67"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110216384)))]; tensor var_1495 = mul(x = const_67, y = var_1448)[name = tensor("op_1495")]; tensor var_1496 = const()[name = tensor("op_1496"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1497 = add(x = variance_29, y = var_1496)[name = tensor("op_1497")]; tensor var_1498_epsilon_0 = const()[name = tensor("op_1498_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1498 = rsqrt(epsilon = var_1498_epsilon_0, x = var_1497)[name = tensor("op_1498")]; tensor k_19 = mul(x = var_1495, y = var_1498)[name = tensor("k_19")]; tensor var_1513 = mul(x = q_19, y = cos_r_1)[name = tensor("op_1513")]; tensor x1_13_begin_0 = const()[name = tensor("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = tensor("x1_13_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_13_end_mask_0 = const()[name = tensor("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_19)[name = tensor("x1_13")]; tensor x2_13_begin_0 = const()[name = tensor("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = tensor("x2_13_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_13_end_mask_0 = const()[name = tensor("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_19)[name = tensor("x2_13")]; tensor const_36_promoted = const()[name = tensor("const_36_promoted"), val = tensor(-0x1p+0)]; tensor var_1534 = mul(x = x2_13, y = const_36_promoted)[name = tensor("op_1534")]; tensor var_1536 = const()[name = tensor("op_1536"), val = tensor(-1)]; tensor var_1537_interleave_0 = const()[name = tensor("op_1537_interleave_0"), val = tensor(false)]; tensor var_1537 = concat(axis = var_1536, interleave = var_1537_interleave_0, values = (var_1534, x1_13))[name = tensor("op_1537")]; tensor var_1538 = mul(x = var_1537, y = sin_r_1)[name = tensor("op_1538")]; tensor q_23 = add(x = var_1513, y = var_1538)[name = tensor("q_23")]; tensor var_1541 = mul(x = k_19, y = cos_r_1)[name = tensor("op_1541")]; tensor x1_15_begin_0 = const()[name = tensor("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = tensor("x1_15_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_15_end_mask_0 = const()[name = tensor("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_19)[name = tensor("x1_15")]; tensor x2_15_begin_0 = const()[name = tensor("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = tensor("x2_15_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_15_end_mask_0 = const()[name = tensor("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_19)[name = tensor("x2_15")]; tensor const_39_promoted = const()[name = tensor("const_39_promoted"), val = tensor(-0x1p+0)]; tensor var_1562 = mul(x = x2_15, y = const_39_promoted)[name = tensor("op_1562")]; tensor var_1564 = const()[name = tensor("op_1564"), val = tensor(-1)]; tensor var_1565_interleave_0 = const()[name = tensor("op_1565_interleave_0"), val = tensor(false)]; tensor var_1565 = concat(axis = var_1564, interleave = var_1565_interleave_0, values = (var_1562, x1_15))[name = tensor("op_1565")]; tensor var_1566 = mul(x = var_1565, y = sin_r_1)[name = tensor("op_1566")]; tensor k_23 = add(x = var_1541, y = var_1566)[name = tensor("k_23")]; tensor var_1573 = const()[name = tensor("op_1573"), val = tensor([1, 1024, 1, 1])]; tensor nk_7 = reshape(shape = var_1573, x = k_23)[name = tensor("nk_7")]; tensor var_1579 = const()[name = tensor("op_1579"), val = tensor([1, 1024, 1, 1])]; tensor nv_7 = reshape(shape = var_1579, x = var_1454)[name = tensor("nv_7")]; tensor var_1584 = mul(x = var_1373, y = var_473)[name = tensor("op_1584")]; tensor var_1585 = mul(x = nk_7, y = update_mask)[name = tensor("op_1585")]; tensor lkc_15 = add(x = var_1584, y = var_1585)[name = tensor("lkc_15")]; tensor var_1591 = mul(x = var_1393, y = var_473)[name = tensor("op_1591")]; tensor var_1592 = mul(x = nv_7, y = update_mask)[name = tensor("op_1592")]; tensor lvc_15 = add(x = var_1591, y = var_1592)[name = tensor("lvc_15")]; tensor var_1596_axes_0 = const()[name = tensor("op_1596_axes_0"), val = tensor([2])]; tensor var_1596 = squeeze(axes = var_1596_axes_0, x = lkc_15)[name = tensor("op_1596")]; tensor var_1601 = const()[name = tensor("op_1601"), val = tensor([1, 8, 128, 16])]; tensor kc_13 = reshape(shape = var_1601, x = var_1596)[name = tensor("kc_13")]; tensor var_1604_axes_0 = const()[name = tensor("op_1604_axes_0"), val = tensor([2])]; tensor var_1604 = squeeze(axes = var_1604_axes_0, x = lvc_15)[name = tensor("op_1604")]; tensor var_1609 = const()[name = tensor("op_1609"), val = tensor([1, 8, 128, 16])]; tensor vc_13 = reshape(shape = var_1609, x = var_1604)[name = tensor("vc_13")]; tensor var_1612_axes_0 = const()[name = tensor("op_1612_axes_0"), val = tensor([2])]; tensor var_1612 = expand_dims(axes = var_1612_axes_0, x = kc_13)[name = tensor("op_1612")]; tensor var_1620_reps_0 = const()[name = tensor("op_1620_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1620 = tile(reps = var_1620_reps_0, x = var_1612)[name = tensor("op_1620")]; tensor var_1625 = const()[name = tensor("op_1625"), val = tensor([1, 16, 128, 16])]; tensor kc_15 = reshape(shape = var_1625, x = var_1620)[name = tensor("kc_15")]; tensor var_1628_axes_0 = const()[name = tensor("op_1628_axes_0"), val = tensor([2])]; tensor var_1628 = expand_dims(axes = var_1628_axes_0, x = vc_13)[name = tensor("op_1628")]; tensor var_1636_reps_0 = const()[name = tensor("op_1636_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1636 = tile(reps = var_1636_reps_0, x = var_1628)[name = tensor("op_1636")]; tensor var_1641 = const()[name = tensor("op_1641"), val = tensor([1, 16, 128, 16])]; tensor vc_15 = reshape(shape = var_1641, x = var_1636)[name = tensor("vc_15")]; tensor var_1645_perm_0 = const()[name = tensor("op_1645_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_1646_transpose_x_0 = const()[name = tensor("op_1646_transpose_x_0"), val = tensor(false)]; tensor var_1646_transpose_y_0 = const()[name = tensor("op_1646_transpose_y_0"), val = tensor(false)]; tensor var_1645 = transpose(perm = var_1645_perm_0, x = q_23)[name = tensor("transpose_3")]; tensor var_1646 = matmul(transpose_x = var_1646_transpose_x_0, transpose_y = var_1646_transpose_y_0, x = var_1645, y = kc_15)[name = tensor("op_1646")]; tensor _inversed_aw_25_y_0 = const()[name = tensor("_inversed_aw_25_y_0"), val = tensor(0x1.6a09e6p-4)]; tensor _inversed_aw_25 = mul(x = var_1646, y = _inversed_aw_25_y_0)[name = tensor("_inversed_aw_25")]; tensor aw_27 = add(x = _inversed_aw_25, y = var_542)[name = tensor("aw_27")]; tensor var_1660 = const()[name = tensor("op_1660"), val = tensor(-1)]; tensor aw_31 = softmax(axis = var_1660, x = aw_27)[name = tensor("aw_31")]; tensor var_1666_transpose_x_1 = const()[name = tensor("op_1666_transpose_x_1"), val = tensor(false)]; tensor var_1666_transpose_y_1 = const()[name = tensor("op_1666_transpose_y_1"), val = tensor(true)]; tensor var_1666 = matmul(transpose_x = var_1666_transpose_x_1, transpose_y = var_1666_transpose_y_1, x = aw_31, y = vc_15)[name = tensor("op_1666")]; tensor var_1669_perm_0 = const()[name = tensor("op_1669_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1673 = const()[name = tensor("op_1673"), val = tensor([1, 1, -1])]; tensor var_1669 = transpose(perm = var_1669_perm_0, x = var_1666)[name = tensor("transpose_2")]; tensor input_33 = reshape(shape = var_1673, x = var_1669)[name = tensor("input_33")]; tensor var_1677 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_o_proj_weight_palettized, x = input_33)[name = tensor("linear_24")]; tensor var_1679_axes_0 = const()[name = tensor("op_1679_axes_0"), val = tensor([0])]; tensor var_1679 = squeeze(axes = var_1679_axes_0, x = var_1677)[name = tensor("op_1679")]; tensor var_1681_axes_0 = const()[name = tensor("op_1681_axes_0"), val = tensor([0])]; tensor var_1681 = squeeze(axes = var_1681_axes_0, x = var_1679)[name = tensor("op_1681")]; tensor var_1683_axes_0 = const()[name = tensor("op_1683_axes_0"), val = tensor([-1])]; tensor var_1683 = expand_dims(axes = var_1683_axes_0, x = var_1681)[name = tensor("op_1683")]; tensor ao_7_axes_0 = const()[name = tensor("ao_7_axes_0"), val = tensor([-1])]; tensor ao_7 = expand_dims(axes = ao_7_axes_0, x = var_1683)[name = tensor("ao_7")]; tensor hidden_13 = add(x = hidden_11, y = ao_7)[name = tensor("hidden_13")]; tensor var_1689_axes_0 = const()[name = tensor("op_1689_axes_0"), val = tensor([-1])]; tensor var_1689 = squeeze(axes = var_1689_axes_0, x = hidden_13)[name = tensor("op_1689")]; tensor var_1691_axes_0 = const()[name = tensor("op_1691_axes_0"), val = tensor([-1])]; tensor var_1691 = squeeze(axes = var_1691_axes_0, x = var_1689)[name = tensor("op_1691")]; tensor hidden_states_61_axes_0 = const()[name = tensor("hidden_states_61_axes_0"), val = tensor([0])]; tensor hidden_states_61 = expand_dims(axes = hidden_states_61_axes_0, x = var_1691)[name = tensor("hidden_states_61")]; tensor var_1697_promoted = const()[name = tensor("op_1697_promoted"), val = tensor(0x1p+1)]; tensor var_1703 = pow(x = hidden_states_61, y = var_1697_promoted)[name = tensor("op_1703")]; tensor variance_31_axes_0 = const()[name = tensor("variance_31_axes_0"), val = tensor([-1])]; tensor variance_31_keep_dims_0 = const()[name = tensor("variance_31_keep_dims_0"), val = tensor(true)]; tensor variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1703)[name = tensor("variance_31")]; tensor const_40 = const()[name = tensor("const_40"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110216960)))]; tensor var_1707 = mul(x = const_40, y = hidden_states_61)[name = tensor("op_1707")]; tensor var_1708 = const()[name = tensor("op_1708"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1709 = add(x = variance_31, y = var_1708)[name = tensor("op_1709")]; tensor var_1710_epsilon_0 = const()[name = tensor("op_1710_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1710 = rsqrt(epsilon = var_1710_epsilon_0, x = var_1709)[name = tensor("op_1710")]; tensor input_35 = mul(x = var_1707, y = var_1710)[name = tensor("input_35")]; tensor input_37 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_35)[name = tensor("linear_25")]; tensor var_1718 = silu(x = input_37)[name = tensor("op_1718")]; tensor var_1720 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_35)[name = tensor("linear_26")]; tensor input_39 = mul(x = var_1718, y = var_1720)[name = tensor("input_39")]; tensor var_1723 = linear(bias = linear_1_bias_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_39)[name = tensor("linear_27")]; tensor var_1725_axes_0 = const()[name = tensor("op_1725_axes_0"), val = tensor([0])]; tensor var_1725 = squeeze(axes = var_1725_axes_0, x = var_1723)[name = tensor("op_1725")]; tensor var_1727_axes_0 = const()[name = tensor("op_1727_axes_0"), val = tensor([0])]; tensor var_1727 = squeeze(axes = var_1727_axes_0, x = var_1725)[name = tensor("op_1727")]; tensor var_1729_axes_0 = const()[name = tensor("op_1729_axes_0"), val = tensor([-1])]; tensor var_1729 = expand_dims(axes = var_1729_axes_0, x = var_1727)[name = tensor("op_1729")]; tensor h_7_axes_0 = const()[name = tensor("h_7_axes_0"), val = tensor([-1])]; tensor h_7 = expand_dims(axes = h_7_axes_0, x = var_1729)[name = tensor("h_7")]; tensor hidden_15 = add(x = hidden_13, y = h_7)[name = tensor("hidden_15")]; tensor var_1743_begin_0 = const()[name = tensor("op_1743_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_1743_end_0 = const()[name = tensor("op_1743_end_0"), val = tensor([1, 1, 1, 16])]; tensor var_1743_end_mask_0 = const()[name = tensor("op_1743_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1743 = slice_by_index(begin = var_1743_begin_0, end = var_1743_end_0, end_mask = var_1743_end_mask_0, x = cast_1)[name = tensor("op_1743")]; tensor var_1763_begin_0 = const()[name = tensor("op_1763_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_1763_end_0 = const()[name = tensor("op_1763_end_0"), val = tensor([1, 1, 1, 16])]; tensor var_1763_end_mask_0 = const()[name = tensor("op_1763_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1763 = slice_by_index(begin = var_1763_begin_0, end = var_1763_end_0, end_mask = var_1763_end_mask_0, x = cast_4)[name = tensor("op_1763")]; tensor var_1775_axes_0 = const()[name = tensor("op_1775_axes_0"), val = tensor([-1])]; tensor var_1775 = squeeze(axes = var_1775_axes_0, x = hidden_15)[name = tensor("op_1775")]; tensor var_1777_axes_0 = const()[name = tensor("op_1777_axes_0"), val = tensor([-1])]; tensor var_1777 = squeeze(axes = var_1777_axes_0, x = var_1775)[name = tensor("op_1777")]; tensor hidden_states_65_axes_0 = const()[name = tensor("hidden_states_65_axes_0"), val = tensor([0])]; tensor hidden_states_65 = expand_dims(axes = hidden_states_65_axes_0, x = var_1777)[name = tensor("hidden_states_65")]; tensor var_1783_promoted = const()[name = tensor("op_1783_promoted"), val = tensor(0x1p+1)]; tensor var_1789 = pow(x = hidden_states_65, y = var_1783_promoted)[name = tensor("op_1789")]; tensor variance_33_axes_0 = const()[name = tensor("variance_33_axes_0"), val = tensor([-1])]; tensor variance_33_keep_dims_0 = const()[name = tensor("variance_33_keep_dims_0"), val = tensor(true)]; tensor variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1789)[name = tensor("variance_33")]; tensor const_41 = const()[name = tensor("const_41"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110221120)))]; tensor var_1793 = mul(x = const_41, y = hidden_states_65)[name = tensor("op_1793")]; tensor var_1794 = const()[name = tensor("op_1794"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1795 = add(x = variance_33, y = var_1794)[name = tensor("op_1795")]; tensor var_1796_epsilon_0 = const()[name = tensor("op_1796_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1796 = rsqrt(epsilon = var_1796_epsilon_0, x = var_1795)[name = tensor("op_1796")]; tensor input_41 = mul(x = var_1793, y = var_1796)[name = tensor("input_41")]; tensor var_1800 = linear(bias = linear_0_bias_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = input_41)[name = tensor("linear_28")]; tensor var_1805 = const()[name = tensor("op_1805"), val = tensor([1, 1, 16, 128])]; tensor var_1806 = reshape(shape = var_1805, x = var_1800)[name = tensor("op_1806")]; tensor var_1812 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = input_41)[name = tensor("linear_29")]; tensor var_1817 = const()[name = tensor("op_1817"), val = tensor([1, 1, 8, 128])]; tensor var_1818 = reshape(shape = var_1817, x = var_1812)[name = tensor("op_1818")]; tensor var_1824 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = input_41)[name = tensor("linear_30")]; tensor var_1837_promoted = const()[name = tensor("op_1837_promoted"), val = tensor(0x1p+1)]; tensor var_1843 = pow(x = var_1806, y = var_1837_promoted)[name = tensor("op_1843")]; tensor variance_35_keep_dims_0 = const()[name = tensor("variance_35_keep_dims_0"), val = tensor(true)]; tensor const_68 = const()[name = tensor("const_68"), val = tensor([3])]; tensor variance_35 = reduce_mean(axes = const_68, keep_dims = variance_35_keep_dims_0, x = var_1843)[name = tensor("variance_35")]; tensor const_69 = const()[name = tensor("const_69"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110225280)))]; tensor var_1847 = mul(x = const_69, y = var_1806)[name = tensor("op_1847")]; tensor var_1848 = const()[name = tensor("op_1848"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1849 = add(x = variance_35, y = var_1848)[name = tensor("op_1849")]; tensor var_1850_epsilon_0 = const()[name = tensor("op_1850_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1850 = rsqrt(epsilon = var_1850_epsilon_0, x = var_1849)[name = tensor("op_1850")]; tensor q_25 = mul(x = var_1847, y = var_1850)[name = tensor("q_25")]; tensor var_1855_promoted = const()[name = tensor("op_1855_promoted"), val = tensor(0x1p+1)]; tensor var_1861 = pow(x = var_1818, y = var_1855_promoted)[name = tensor("op_1861")]; tensor variance_37_keep_dims_0 = const()[name = tensor("variance_37_keep_dims_0"), val = tensor(true)]; tensor const_70 = const()[name = tensor("const_70"), val = tensor([3])]; tensor variance_37 = reduce_mean(axes = const_70, keep_dims = variance_37_keep_dims_0, x = var_1861)[name = tensor("variance_37")]; tensor const_71 = const()[name = tensor("const_71"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110225856)))]; tensor var_1865 = mul(x = const_71, y = var_1818)[name = tensor("op_1865")]; tensor var_1866 = const()[name = tensor("op_1866"), val = tensor(0x1.0c6f7ap-20)]; tensor var_1867 = add(x = variance_37, y = var_1866)[name = tensor("op_1867")]; tensor var_1868_epsilon_0 = const()[name = tensor("op_1868_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_1868 = rsqrt(epsilon = var_1868_epsilon_0, x = var_1867)[name = tensor("op_1868")]; tensor k_25 = mul(x = var_1865, y = var_1868)[name = tensor("k_25")]; tensor var_1883 = mul(x = q_25, y = cos_r_1)[name = tensor("op_1883")]; tensor x1_17_begin_0 = const()[name = tensor("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = tensor("x1_17_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_17_end_mask_0 = const()[name = tensor("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_25)[name = tensor("x1_17")]; tensor x2_17_begin_0 = const()[name = tensor("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = tensor("x2_17_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_17_end_mask_0 = const()[name = tensor("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_25)[name = tensor("x2_17")]; tensor const_46_promoted = const()[name = tensor("const_46_promoted"), val = tensor(-0x1p+0)]; tensor var_1904 = mul(x = x2_17, y = const_46_promoted)[name = tensor("op_1904")]; tensor var_1906 = const()[name = tensor("op_1906"), val = tensor(-1)]; tensor var_1907_interleave_0 = const()[name = tensor("op_1907_interleave_0"), val = tensor(false)]; tensor var_1907 = concat(axis = var_1906, interleave = var_1907_interleave_0, values = (var_1904, x1_17))[name = tensor("op_1907")]; tensor var_1908 = mul(x = var_1907, y = sin_r_1)[name = tensor("op_1908")]; tensor q = add(x = var_1883, y = var_1908)[name = tensor("q")]; tensor var_1911 = mul(x = k_25, y = cos_r_1)[name = tensor("op_1911")]; tensor x1_begin_0 = const()[name = tensor("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = tensor("x1_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_end_mask_0 = const()[name = tensor("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_25)[name = tensor("x1")]; tensor x2_begin_0 = const()[name = tensor("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = tensor("x2_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_end_mask_0 = const()[name = tensor("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_25)[name = tensor("x2")]; tensor const_49_promoted = const()[name = tensor("const_49_promoted"), val = tensor(-0x1p+0)]; tensor var_1932 = mul(x = x2, y = const_49_promoted)[name = tensor("op_1932")]; tensor var_1934 = const()[name = tensor("op_1934"), val = tensor(-1)]; tensor var_1935_interleave_0 = const()[name = tensor("op_1935_interleave_0"), val = tensor(false)]; tensor var_1935 = concat(axis = var_1934, interleave = var_1935_interleave_0, values = (var_1932, x1))[name = tensor("op_1935")]; tensor var_1936 = mul(x = var_1935, y = sin_r_1)[name = tensor("op_1936")]; tensor k = add(x = var_1911, y = var_1936)[name = tensor("k")]; tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1024, 1, 1])]; tensor nk = reshape(shape = var_1943, x = k)[name = tensor("nk")]; tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1024, 1, 1])]; tensor nv = reshape(shape = var_1949, x = var_1824)[name = tensor("nv")]; tensor var_1954 = mul(x = var_1743, y = var_473)[name = tensor("op_1954")]; tensor var_1955 = mul(x = nk, y = update_mask)[name = tensor("op_1955")]; tensor lkc = add(x = var_1954, y = var_1955)[name = tensor("lkc")]; tensor var_1961 = mul(x = var_1763, y = var_473)[name = tensor("op_1961")]; tensor var_1962 = mul(x = nv, y = update_mask)[name = tensor("op_1962")]; tensor lvc = add(x = var_1961, y = var_1962)[name = tensor("lvc")]; tensor var_1966_axes_0 = const()[name = tensor("op_1966_axes_0"), val = tensor([2])]; tensor var_1966 = squeeze(axes = var_1966_axes_0, x = lkc)[name = tensor("op_1966")]; tensor var_1971 = const()[name = tensor("op_1971"), val = tensor([1, 8, 128, 16])]; tensor kc_17 = reshape(shape = var_1971, x = var_1966)[name = tensor("kc_17")]; tensor var_1974_axes_0 = const()[name = tensor("op_1974_axes_0"), val = tensor([2])]; tensor var_1974 = squeeze(axes = var_1974_axes_0, x = lvc)[name = tensor("op_1974")]; tensor var_1979 = const()[name = tensor("op_1979"), val = tensor([1, 8, 128, 16])]; tensor vc_17 = reshape(shape = var_1979, x = var_1974)[name = tensor("vc_17")]; tensor var_1982_axes_0 = const()[name = tensor("op_1982_axes_0"), val = tensor([2])]; tensor var_1982 = expand_dims(axes = var_1982_axes_0, x = kc_17)[name = tensor("op_1982")]; tensor var_1990_reps_0 = const()[name = tensor("op_1990_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1990 = tile(reps = var_1990_reps_0, x = var_1982)[name = tensor("op_1990")]; tensor var_1995 = const()[name = tensor("op_1995"), val = tensor([1, 16, 128, 16])]; tensor kc = reshape(shape = var_1995, x = var_1990)[name = tensor("kc")]; tensor var_1998_axes_0 = const()[name = tensor("op_1998_axes_0"), val = tensor([2])]; tensor var_1998 = expand_dims(axes = var_1998_axes_0, x = vc_17)[name = tensor("op_1998")]; tensor var_2006_reps_0 = const()[name = tensor("op_2006_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2006 = tile(reps = var_2006_reps_0, x = var_1998)[name = tensor("op_2006")]; tensor var_2011 = const()[name = tensor("op_2011"), val = tensor([1, 16, 128, 16])]; tensor vc = reshape(shape = var_2011, x = var_2006)[name = tensor("vc")]; tensor var_2015_perm_0 = const()[name = tensor("op_2015_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_2016_transpose_x_0 = const()[name = tensor("op_2016_transpose_x_0"), val = tensor(false)]; tensor var_2016_transpose_y_0 = const()[name = tensor("op_2016_transpose_y_0"), val = tensor(false)]; tensor var_2015 = transpose(perm = var_2015_perm_0, x = q)[name = tensor("transpose_1")]; tensor var_2016 = matmul(transpose_x = var_2016_transpose_x_0, transpose_y = var_2016_transpose_y_0, x = var_2015, y = kc)[name = tensor("op_2016")]; tensor _inversed_aw_33_y_0 = const()[name = tensor("_inversed_aw_33_y_0"), val = tensor(0x1.6a09e6p-4)]; tensor _inversed_aw_33 = mul(x = var_2016, y = _inversed_aw_33_y_0)[name = tensor("_inversed_aw_33")]; tensor aw_35 = add(x = _inversed_aw_33, y = var_542)[name = tensor("aw_35")]; tensor var_2030 = const()[name = tensor("op_2030"), val = tensor(-1)]; tensor aw = softmax(axis = var_2030, x = aw_35)[name = tensor("aw")]; tensor var_2036_transpose_x_1 = const()[name = tensor("op_2036_transpose_x_1"), val = tensor(false)]; tensor var_2036_transpose_y_1 = const()[name = tensor("op_2036_transpose_y_1"), val = tensor(true)]; tensor var_2036 = matmul(transpose_x = var_2036_transpose_x_1, transpose_y = var_2036_transpose_y_1, x = aw, y = vc)[name = tensor("op_2036")]; tensor var_2039_perm_0 = const()[name = tensor("op_2039_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2043 = const()[name = tensor("op_2043"), val = tensor([1, 1, -1])]; tensor var_2039 = transpose(perm = var_2039_perm_0, x = var_2036)[name = tensor("transpose_0")]; tensor input_43 = reshape(shape = var_2043, x = var_2039)[name = tensor("input_43")]; tensor var_2047 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_o_proj_weight_palettized, x = input_43)[name = tensor("linear_31")]; tensor var_2049_axes_0 = const()[name = tensor("op_2049_axes_0"), val = tensor([0])]; tensor var_2049 = squeeze(axes = var_2049_axes_0, x = var_2047)[name = tensor("op_2049")]; tensor var_2051_axes_0 = const()[name = tensor("op_2051_axes_0"), val = tensor([0])]; tensor var_2051 = squeeze(axes = var_2051_axes_0, x = var_2049)[name = tensor("op_2051")]; tensor var_2053_axes_0 = const()[name = tensor("op_2053_axes_0"), val = tensor([-1])]; tensor var_2053 = expand_dims(axes = var_2053_axes_0, x = var_2051)[name = tensor("op_2053")]; tensor ao_axes_0 = const()[name = tensor("ao_axes_0"), val = tensor([-1])]; tensor ao = expand_dims(axes = ao_axes_0, x = var_2053)[name = tensor("ao")]; tensor hidden_17 = add(x = hidden_15, y = ao)[name = tensor("hidden_17")]; tensor var_2059_axes_0 = const()[name = tensor("op_2059_axes_0"), val = tensor([-1])]; tensor var_2059 = squeeze(axes = var_2059_axes_0, x = hidden_17)[name = tensor("op_2059")]; tensor var_2061_axes_0 = const()[name = tensor("op_2061_axes_0"), val = tensor([-1])]; tensor var_2061 = squeeze(axes = var_2061_axes_0, x = var_2059)[name = tensor("op_2061")]; tensor hidden_states_77_axes_0 = const()[name = tensor("hidden_states_77_axes_0"), val = tensor([0])]; tensor hidden_states_77 = expand_dims(axes = hidden_states_77_axes_0, x = var_2061)[name = tensor("hidden_states_77")]; tensor var_2067_promoted = const()[name = tensor("op_2067_promoted"), val = tensor(0x1p+1)]; tensor var_2073 = pow(x = hidden_states_77, y = var_2067_promoted)[name = tensor("op_2073")]; tensor variance_39_axes_0 = const()[name = tensor("variance_39_axes_0"), val = tensor([-1])]; tensor variance_39_keep_dims_0 = const()[name = tensor("variance_39_keep_dims_0"), val = tensor(true)]; tensor variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2073)[name = tensor("variance_39")]; tensor const_50 = const()[name = tensor("const_50"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110226432)))]; tensor var_2077 = mul(x = const_50, y = hidden_states_77)[name = tensor("op_2077")]; tensor var_2078 = const()[name = tensor("op_2078"), val = tensor(0x1.0c6f7ap-20)]; tensor var_2079 = add(x = variance_39, y = var_2078)[name = tensor("op_2079")]; tensor var_2080_epsilon_0 = const()[name = tensor("op_2080_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_2080 = rsqrt(epsilon = var_2080_epsilon_0, x = var_2079)[name = tensor("op_2080")]; tensor input_45 = mul(x = var_2077, y = var_2080)[name = tensor("input_45")]; tensor input_47 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_45)[name = tensor("linear_32")]; tensor var_2088 = silu(x = input_47)[name = tensor("op_2088")]; tensor var_2090 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_45)[name = tensor("linear_33")]; tensor input_49 = mul(x = var_2088, y = var_2090)[name = tensor("input_49")]; tensor var_2093 = linear(bias = linear_1_bias_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_49)[name = tensor("linear_34")]; tensor var_2095_axes_0 = const()[name = tensor("op_2095_axes_0"), val = tensor([0])]; tensor var_2095 = squeeze(axes = var_2095_axes_0, x = var_2093)[name = tensor("op_2095")]; tensor var_2097_axes_0 = const()[name = tensor("op_2097_axes_0"), val = tensor([0])]; tensor var_2097 = squeeze(axes = var_2097_axes_0, x = var_2095)[name = tensor("op_2097")]; tensor var_2099_axes_0 = const()[name = tensor("op_2099_axes_0"), val = tensor([-1])]; tensor var_2099 = expand_dims(axes = var_2099_axes_0, x = var_2097)[name = tensor("op_2099")]; tensor h_axes_0 = const()[name = tensor("h_axes_0"), val = tensor([-1])]; tensor h = expand_dims(axes = h_axes_0, x = var_2099)[name = tensor("h")]; tensor hidden = add(x = hidden_17, y = h)[name = tensor("hidden")]; tensor var_2105_axes_0 = const()[name = tensor("op_2105_axes_0"), val = tensor([-1])]; tensor var_2105 = squeeze(axes = var_2105_axes_0, x = hidden)[name = tensor("op_2105")]; tensor var_2107_axes_0 = const()[name = tensor("op_2107_axes_0"), val = tensor([-1])]; tensor var_2107 = squeeze(axes = var_2107_axes_0, x = var_2105)[name = tensor("op_2107")]; tensor hidden_states_81_axes_0 = const()[name = tensor("hidden_states_81_axes_0"), val = tensor([0])]; tensor hidden_states_81 = expand_dims(axes = hidden_states_81_axes_0, x = var_2107)[name = tensor("hidden_states_81")]; tensor var_2113_promoted = const()[name = tensor("op_2113_promoted"), val = tensor(0x1p+1)]; tensor var_2119 = pow(x = hidden_states_81, y = var_2113_promoted)[name = tensor("op_2119")]; tensor variance_axes_0 = const()[name = tensor("variance_axes_0"), val = tensor([-1])]; tensor variance_keep_dims_0 = const()[name = tensor("variance_keep_dims_0"), val = tensor(true)]; tensor variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2119)[name = tensor("variance")]; tensor const_51 = const()[name = tensor("const_51"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110230592)))]; tensor var_2123 = mul(x = const_51, y = hidden_states_81)[name = tensor("op_2123")]; tensor var_2124 = const()[name = tensor("op_2124"), val = tensor(0x1.0c6f7ap-20)]; tensor var_2125 = add(x = variance, y = var_2124)[name = tensor("op_2125")]; tensor var_2126_epsilon_0 = const()[name = tensor("op_2126_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_2126 = rsqrt(epsilon = var_2126_epsilon_0, x = var_2125)[name = tensor("op_2126")]; tensor input = mul(x = var_2123, y = var_2126)[name = tensor("input")]; tensor var_2130 = linear(bias = linear_0_bias_0, weight = lm_heads_0_weight_palettized, x = input)[name = tensor("linear_35")]; tensor var_2132_axes_0 = const()[name = tensor("op_2132_axes_0"), val = tensor([1])]; tensor var_2132 = squeeze(axes = var_2132_axes_0, x = var_2130)[name = tensor("op_2132")]; tensor var_2135 = linear(bias = linear_0_bias_0, weight = lm_heads_1_weight_palettized, x = input)[name = tensor("linear_36")]; tensor var_2137_axes_0 = const()[name = tensor("op_2137_axes_0"), val = tensor([1])]; tensor var_2137 = squeeze(axes = var_2137_axes_0, x = var_2135)[name = tensor("op_2137")]; tensor var_2140 = linear(bias = linear_0_bias_0, weight = lm_heads_2_weight_palettized, x = input)[name = tensor("linear_37")]; tensor var_2142_axes_0 = const()[name = tensor("op_2142_axes_0"), val = tensor([1])]; tensor var_2142 = squeeze(axes = var_2142_axes_0, x = var_2140)[name = tensor("op_2142")]; tensor var_2145 = linear(bias = linear_0_bias_0, weight = lm_heads_3_weight_palettized, x = input)[name = tensor("linear_38")]; tensor var_2147_axes_0 = const()[name = tensor("op_2147_axes_0"), val = tensor([1])]; tensor var_2147 = squeeze(axes = var_2147_axes_0, x = var_2145)[name = tensor("op_2147")]; tensor var_2150 = linear(bias = linear_0_bias_0, weight = lm_heads_4_weight_palettized, x = input)[name = tensor("linear_39")]; tensor var_2152_axes_0 = const()[name = tensor("op_2152_axes_0"), val = tensor([1])]; tensor var_2152 = squeeze(axes = var_2152_axes_0, x = var_2150)[name = tensor("op_2152")]; tensor var_2155 = linear(bias = linear_0_bias_0, weight = lm_heads_5_weight_palettized, x = input)[name = tensor("linear_40")]; tensor var_2157_axes_0 = const()[name = tensor("op_2157_axes_0"), val = tensor([1])]; tensor var_2157 = squeeze(axes = var_2157_axes_0, x = var_2155)[name = tensor("op_2157")]; tensor var_2160 = linear(bias = linear_0_bias_0, weight = lm_heads_6_weight_palettized, x = input)[name = tensor("linear_41")]; tensor var_2162_axes_0 = const()[name = tensor("op_2162_axes_0"), val = tensor([1])]; tensor var_2162 = squeeze(axes = var_2162_axes_0, x = var_2160)[name = tensor("op_2162")]; tensor var_2165 = linear(bias = linear_0_bias_0, weight = lm_heads_7_weight_palettized, x = input)[name = tensor("linear_42")]; tensor var_2167_axes_0 = const()[name = tensor("op_2167_axes_0"), val = tensor([1])]; tensor var_2167 = squeeze(axes = var_2167_axes_0, x = var_2165)[name = tensor("op_2167")]; tensor var_2170 = linear(bias = linear_0_bias_0, weight = lm_heads_8_weight_palettized, x = input)[name = tensor("linear_43")]; tensor var_2172_axes_0 = const()[name = tensor("op_2172_axes_0"), val = tensor([1])]; tensor var_2172 = squeeze(axes = var_2172_axes_0, x = var_2170)[name = tensor("op_2172")]; tensor var_2175 = linear(bias = linear_0_bias_0, weight = lm_heads_9_weight_palettized, x = input)[name = tensor("linear_44")]; tensor var_2177_axes_0 = const()[name = tensor("op_2177_axes_0"), val = tensor([1])]; tensor var_2177 = squeeze(axes = var_2177_axes_0, x = var_2175)[name = tensor("op_2177")]; tensor var_2180 = linear(bias = linear_0_bias_0, weight = lm_heads_10_weight_palettized, x = input)[name = tensor("linear_45")]; tensor var_2182_axes_0 = const()[name = tensor("op_2182_axes_0"), val = tensor([1])]; tensor var_2182 = squeeze(axes = var_2182_axes_0, x = var_2180)[name = tensor("op_2182")]; tensor var_2185 = linear(bias = linear_0_bias_0, weight = lm_heads_11_weight_palettized, x = input)[name = tensor("linear_46")]; tensor var_2187_axes_0 = const()[name = tensor("op_2187_axes_0"), val = tensor([1])]; tensor var_2187 = squeeze(axes = var_2187_axes_0, x = var_2185)[name = tensor("op_2187")]; tensor var_2190 = linear(bias = linear_0_bias_0, weight = lm_heads_12_weight_palettized, x = input)[name = tensor("linear_47")]; tensor var_2192_axes_0 = const()[name = tensor("op_2192_axes_0"), val = tensor([1])]; tensor var_2192 = squeeze(axes = var_2192_axes_0, x = var_2190)[name = tensor("op_2192")]; tensor var_2195 = linear(bias = linear_0_bias_0, weight = lm_heads_13_weight_palettized, x = input)[name = tensor("linear_48")]; tensor var_2197_axes_0 = const()[name = tensor("op_2197_axes_0"), val = tensor([1])]; tensor var_2197 = squeeze(axes = var_2197_axes_0, x = var_2195)[name = tensor("op_2197")]; tensor var_2200 = linear(bias = linear_0_bias_0, weight = lm_heads_14_weight_palettized, x = input)[name = tensor("linear_49")]; tensor var_2202_axes_0 = const()[name = tensor("op_2202_axes_0"), val = tensor([1])]; tensor var_2202 = squeeze(axes = var_2202_axes_0, x = var_2200)[name = tensor("op_2202")]; tensor var_2205_axis_0 = const()[name = tensor("op_2205_axis_0"), val = tensor(1)]; tensor all_logits_type_fp32 = stack(axis = var_2205_axis_0, values = (var_2132, var_2137, var_2142, var_2147, var_2152, var_2157, var_2162, var_2167, var_2172, var_2177, var_2182, var_2187, var_2192, var_2197, var_2202))[name = tensor("op_2205")]; tensor var_2207_axes_0 = const()[name = tensor("op_2207_axes_0"), val = tensor([0])]; tensor var_2207 = squeeze(axes = var_2207_axes_0, x = input)[name = tensor("op_2207")]; tensor var_2209_axes_0 = const()[name = tensor("op_2209_axes_0"), val = tensor([-1])]; tensor var_2209 = expand_dims(axes = var_2209_axes_0, x = var_2207)[name = tensor("op_2209")]; tensor var_2211_axes_0 = const()[name = tensor("op_2211_axes_0"), val = tensor([-1])]; tensor hidden_states_type_fp32 = expand_dims(axes = var_2211_axes_0, x = var_2209)[name = tensor("op_2211")]; tensor var_2213 = const()[name = tensor("op_2213"), val = tensor(1)]; tensor new_kv_k_interleave_0 = const()[name = tensor("new_kv_k_interleave_0"), val = tensor(false)]; tensor new_kv_k = concat(axis = var_2213, interleave = new_kv_k_interleave_0, values = (nk_1, nk_3, nk_5, nk_7, nk))[name = tensor("new_kv_k")]; tensor var_2216 = const()[name = tensor("op_2216"), val = tensor(1)]; tensor new_kv_v_interleave_0 = const()[name = tensor("new_kv_v_interleave_0"), val = tensor(false)]; tensor new_kv_v = concat(axis = var_2216, interleave = new_kv_v_interleave_0, values = (nv_1, nv_3, nv_5, nv_7, nv))[name = tensor("new_kv_v")]; tensor var_2221 = mul(x = cast_1, y = var_473)[name = tensor("op_2221")]; tensor var_2222 = mul(x = new_kv_k, y = update_mask)[name = tensor("op_2222")]; tensor new_key_cache_type_fp32 = add(x = var_2221, y = var_2222)[name = tensor("op_2224")]; tensor var_2228 = mul(x = cast_4, y = var_473)[name = tensor("op_2228")]; tensor var_2229 = mul(x = new_kv_v, y = update_mask)[name = tensor("op_2229")]; tensor new_value_cache_type_fp32 = add(x = var_2228, y = var_2229)[name = tensor("op_2231")]; tensor cast_65_dtype_0 = const()[name = tensor("cast_65_dtype_0"), val = tensor("fp16")]; tensor cast_66_dtype_0 = const()[name = tensor("cast_66_dtype_0"), val = tensor("fp16")]; tensor cast_67_dtype_0 = const()[name = tensor("cast_67_dtype_0"), val = tensor("fp16")]; tensor cast_68_dtype_0 = const()[name = tensor("cast_68_dtype_0"), val = tensor("fp16")]; tensor all_logits = cast(dtype = cast_65_dtype_0, x = all_logits_type_fp32)[name = tensor("cast_0")]; tensor hidden_states = cast(dtype = cast_66_dtype_0, x = hidden_states_type_fp32)[name = tensor("cast_1")]; tensor new_key_cache = cast(dtype = cast_67_dtype_0, x = new_key_cache_type_fp32)[name = tensor("cast_2")]; tensor new_value_cache = cast(dtype = cast_68_dtype_0, x = new_value_cache_type_fp32)[name = tensor("cast_3")]; } -> (all_logits, hidden_states, new_key_cache, new_value_cache); }