program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp32")]; string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp32")]; string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")]; string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("fp32")]; string cast_4_dtype_0 = const()[name = string("cast_4_dtype_0"), val = string("fp32")]; tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2098368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3147008))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3148096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4196736))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; tensor layers_0_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4197824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6295040))))[name = string("layers_0_self_attn_o_proj_weight_palettized")]; tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6296128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9441920))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9443008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12588800))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12589888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15735680))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15736768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17833984))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17835072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18883712))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18884800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19933440))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; tensor layers_1_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19934528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22031744))))[name = string("layers_1_self_attn_o_proj_weight_palettized")]; tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22032832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25178624))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25179712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28325504))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31472384))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31473472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33570688))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34620416))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34621504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35670144))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; tensor layers_2_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35671232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37768448))))[name = string("layers_2_self_attn_o_proj_weight_palettized")]; tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37769536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40915328))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40916416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44062208))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44063296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47209088))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47210176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49307392))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49308480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50357120))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50358208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51406848))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; tensor layers_3_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51407936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53505152))))[name = string("layers_3_self_attn_o_proj_weight_palettized")]; tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53506240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56652032))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56653120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59800000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62945792))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62946880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65044096))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65045184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66093824))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66094912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67143552))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; tensor layers_4_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67144640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69241856))))[name = string("layers_4_self_attn_o_proj_weight_palettized")]; tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69242944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72388736))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72389824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75535616))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75536704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78682496))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; tensor lm_heads_0_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78683584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80780800))))[name = string("lm_heads_0_weight_palettized")]; tensor lm_heads_1_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80781888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82879104))))[name = string("lm_heads_1_weight_palettized")]; tensor lm_heads_2_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84977408))))[name = string("lm_heads_2_weight_palettized")]; tensor lm_heads_3_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84978496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87075712))))[name = string("lm_heads_3_weight_palettized")]; tensor lm_heads_4_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87076800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89174016))))[name = string("lm_heads_4_weight_palettized")]; tensor lm_heads_5_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89175104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91272320))))[name = string("lm_heads_5_weight_palettized")]; tensor lm_heads_6_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91273408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93370624))))[name = string("lm_heads_6_weight_palettized")]; tensor lm_heads_7_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93371712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95468928))))[name = string("lm_heads_7_weight_palettized")]; tensor lm_heads_8_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95470016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97567232))))[name = string("lm_heads_8_weight_palettized")]; tensor lm_heads_9_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97568320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99665536))))[name = string("lm_heads_9_weight_palettized")]; tensor lm_heads_10_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99666624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101763840))))[name = string("lm_heads_10_weight_palettized")]; tensor lm_heads_11_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101764928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103862144))))[name = string("lm_heads_11_weight_palettized")]; tensor lm_heads_12_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103863232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105960448))))[name = string("lm_heads_12_weight_palettized")]; tensor lm_heads_13_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105961536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108058752))))[name = string("lm_heads_13_weight_palettized")]; tensor lm_heads_14_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108059840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110157056))))[name = string("lm_heads_14_weight_palettized")]; tensor var_205_axes_0 = const()[name = string("op_205_axes_0"), val = tensor([0])]; tensor var_205 = expand_dims(axes = var_205_axes_0, x = cache_length)[name = string("op_205")]; string position_ids_dtype_0 = const()[name = string("position_ids_dtype_0"), val = string("fp32")]; tensor const_0 = const()[name = string("const_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158144)))]; tensor var_226_axes_0 = const()[name = string("op_226_axes_0"), val = tensor([-1])]; tensor position_ids = cast(dtype = position_ids_dtype_0, x = var_205)[name = string("cast_9")]; tensor var_226 = expand_dims(axes = var_226_axes_0, x = position_ids)[name = string("op_226")]; bool var_227_transpose_x_0 = const()[name = string("op_227_transpose_x_0"), val = bool(false)]; bool var_227_transpose_y_0 = const()[name = string("op_227_transpose_y_0"), val = bool(false)]; tensor var_227 = matmul(transpose_x = var_227_transpose_x_0, transpose_y = var_227_transpose_y_0, x = const_0, y = var_226)[name = string("op_227")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; int32 var_232 = const()[name = string("op_232"), val = int32(-1)]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs = transpose(perm = freqs_perm_0, x = var_227)[name = string("transpose_20")]; tensor emb = concat(axis = var_232, interleave = emb_interleave_0, values = (freqs, freqs))[name = string("emb")]; tensor var_234 = cos(x = emb)[name = string("op_234")]; tensor var_242 = sin(x = emb)[name = string("op_242")]; tensor var_259_begin_0 = const()[name = string("op_259_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_259_end_0 = const()[name = string("op_259_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_259_end_mask_0 = const()[name = string("op_259_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_1 = cast(dtype = cast_1_dtype_0, x = key_cache)[name = string("cast_8")]; tensor var_259 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = cast_1)[name = string("op_259")]; tensor var_279_begin_0 = const()[name = string("op_279_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_279_end_0 = const()[name = string("op_279_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_279_end_mask_0 = const()[name = string("op_279_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_4 = cast(dtype = cast_4_dtype_0, x = value_cache)[name = string("cast_7")]; tensor var_279 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = cast_4)[name = string("op_279")]; tensor var_291_axes_0 = const()[name = string("op_291_axes_0"), val = tensor([-1])]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = input_embeds)[name = string("cast_6")]; tensor var_291 = squeeze(axes = var_291_axes_0, x = cast_0)[name = string("op_291")]; tensor var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor([-1])]; tensor var_293 = squeeze(axes = var_293_axes_0, x = var_291)[name = string("op_293")]; tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([0])]; tensor hidden_states_1 = expand_dims(axes = hidden_states_1_axes_0, x = var_293)[name = string("hidden_states_1")]; fp32 var_299_promoted = const()[name = string("op_299_promoted"), val = fp32(0x1p+1)]; tensor var_305 = pow(x = hidden_states_1, y = var_299_promoted)[name = string("op_305")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_305)[name = string("variance_1")]; fp32 var_308 = const()[name = string("op_308"), val = fp32(0x1.0c6f7ap-20)]; tensor var_309 = add(x = variance_1, y = var_308)[name = string("op_309")]; fp32 var_310_epsilon_0 = const()[name = string("op_310_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_310 = rsqrt(epsilon = var_310_epsilon_0, x = var_309)[name = string("op_310")]; tensor hidden_states_5 = mul(x = hidden_states_1, y = var_310)[name = string("hidden_states_5")]; tensor const_1 = const()[name = string("const_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158464)))]; tensor input_1 = mul(x = const_1, y = hidden_states_5)[name = string("input_1")]; tensor linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110162624)))]; tensor var_316 = linear(bias = linear_0_bias_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = input_1)[name = string("linear_0")]; tensor var_321 = const()[name = string("op_321"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_7 = reshape(shape = var_321, x = var_316)[name = string("hidden_states_7")]; tensor linear_1_bias_0 = const()[name = string("linear_1_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110170880)))]; tensor var_325 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = input_1)[name = string("linear_1")]; tensor var_330 = const()[name = string("op_330"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_13 = reshape(shape = var_330, x = var_325)[name = string("hidden_states_13")]; tensor var_334 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = input_1)[name = string("linear_2")]; tensor var_339 = const()[name = string("op_339"), val = tensor([1, 1, 8, 128])]; tensor v_1 = reshape(shape = var_339, x = var_334)[name = string("v_1")]; fp32 var_344_promoted = const()[name = string("op_344_promoted"), val = fp32(0x1p+1)]; tensor var_350 = pow(x = hidden_states_7, y = var_344_promoted)[name = string("op_350")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_350)[name = string("variance_3")]; fp32 var_353 = const()[name = string("op_353"), val = fp32(0x1.0c6f7ap-20)]; tensor var_354 = add(x = variance_3, y = var_353)[name = string("op_354")]; fp32 var_355_epsilon_0 = const()[name = string("op_355_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_355 = rsqrt(epsilon = var_355_epsilon_0, x = var_354)[name = string("op_355")]; tensor hidden_states_11 = mul(x = hidden_states_7, y = var_355)[name = string("hidden_states_11")]; tensor const_2 = const()[name = string("const_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175040)))]; tensor q_1 = mul(x = const_2, y = hidden_states_11)[name = string("q_1")]; fp32 var_362_promoted = const()[name = string("op_362_promoted"), val = fp32(0x1p+1)]; tensor var_368 = pow(x = hidden_states_13, y = var_362_promoted)[name = string("op_368")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_368)[name = string("variance_5")]; fp32 var_371 = const()[name = string("op_371"), val = fp32(0x1.0c6f7ap-20)]; tensor var_372 = add(x = variance_5, y = var_371)[name = string("op_372")]; fp32 var_373_epsilon_0 = const()[name = string("op_373_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_373 = rsqrt(epsilon = var_373_epsilon_0, x = var_372)[name = string("op_373")]; tensor hidden_states_17 = mul(x = hidden_states_13, y = var_373)[name = string("hidden_states_17")]; tensor const_3 = const()[name = string("const_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175616)))]; tensor k_1 = mul(x = const_3, y = hidden_states_17)[name = string("k_1")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_3_perm_0 = const()[name = string("v_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_r_1_axes_0 = const()[name = string("cos_r_1_axes_0"), val = tensor([1])]; tensor cos_r_1 = expand_dims(axes = cos_r_1_axes_0, x = var_234)[name = string("cos_r_1")]; tensor sin_r_1_axes_0 = const()[name = string("sin_r_1_axes_0"), val = tensor([1])]; tensor sin_r_1 = expand_dims(axes = sin_r_1_axes_0, x = var_242)[name = string("sin_r_1")]; tensor q_3 = transpose(perm = q_3_perm_0, x = q_1)[name = string("transpose_19")]; tensor var_390 = mul(x = q_3, y = cos_r_1)[name = string("op_390")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_3)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_3)[name = string("x2_1")]; fp32 const_6_promoted = const()[name = string("const_6_promoted"), val = fp32(-0x1p+0)]; tensor var_411 = mul(x = x2_1, y = const_6_promoted)[name = string("op_411")]; int32 var_413 = const()[name = string("op_413"), val = int32(-1)]; bool var_414_interleave_0 = const()[name = string("op_414_interleave_0"), val = bool(false)]; tensor var_414 = concat(axis = var_413, interleave = var_414_interleave_0, values = (var_411, x1_1))[name = string("op_414")]; tensor var_415 = mul(x = var_414, y = sin_r_1)[name = string("op_415")]; tensor q_5 = add(x = var_390, y = var_415)[name = string("q_5")]; tensor k_3 = transpose(perm = k_3_perm_0, x = k_1)[name = string("transpose_18")]; tensor var_418 = mul(x = k_3, y = cos_r_1)[name = string("op_418")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_3)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_3)[name = string("x2_3")]; fp32 const_9_promoted = const()[name = string("const_9_promoted"), val = fp32(-0x1p+0)]; tensor var_439 = mul(x = x2_3, y = const_9_promoted)[name = string("op_439")]; int32 var_441 = const()[name = string("op_441"), val = int32(-1)]; bool var_442_interleave_0 = const()[name = string("op_442_interleave_0"), val = bool(false)]; tensor var_442 = concat(axis = var_441, interleave = var_442_interleave_0, values = (var_439, x1_3))[name = string("op_442")]; tensor var_443 = mul(x = var_442, y = sin_r_1)[name = string("op_443")]; tensor k_5 = add(x = var_418, y = var_443)[name = string("k_5")]; tensor var_450 = const()[name = string("op_450"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_1 = reshape(shape = var_450, x = k_5)[name = string("nk_flat_1")]; tensor var_456 = const()[name = string("op_456"), val = tensor([1, 1024, 1, 1])]; tensor v_3 = transpose(perm = v_3_perm_0, x = v_1)[name = string("transpose_17")]; tensor nv_flat_1 = reshape(shape = var_456, x = v_3)[name = string("nv_flat_1")]; tensor var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor([1])]; tensor cast_3 = cast(dtype = cast_3_dtype_0, x = kv_cache_update_mask)[name = string("cast_5")]; tensor var_459 = expand_dims(axes = var_459_axes_0, x = cast_3)[name = string("op_459")]; tensor update_mask_1_axes_0 = const()[name = string("update_mask_1_axes_0"), val = tensor([2])]; tensor update_mask_1 = expand_dims(axes = update_mask_1_axes_0, x = var_459)[name = string("update_mask_1")]; fp32 var_462 = const()[name = string("op_462"), val = fp32(0x1p+0)]; tensor var_464 = sub(x = var_462, y = update_mask_1)[name = string("op_464")]; tensor var_465 = mul(x = var_259, y = var_464)[name = string("op_465")]; tensor var_466 = mul(x = nk_flat_1, y = update_mask_1)[name = string("op_466")]; tensor key_cache_5 = add(x = var_465, y = var_466)[name = string("key_cache_5")]; tensor var_472 = mul(x = var_279, y = var_464)[name = string("op_472")]; tensor var_473 = mul(x = nv_flat_1, y = update_mask_1)[name = string("op_473")]; tensor value_cache_5 = add(x = var_472, y = var_473)[name = string("value_cache_5")]; tensor var_477_axes_0 = const()[name = string("op_477_axes_0"), val = tensor([2])]; tensor var_477 = squeeze(axes = var_477_axes_0, x = key_cache_5)[name = string("op_477")]; tensor var_482 = const()[name = string("op_482"), val = tensor([1, 8, 128, 16])]; tensor kc_1 = reshape(shape = var_482, x = var_477)[name = string("kc_1")]; tensor var_485_axes_0 = const()[name = string("op_485_axes_0"), val = tensor([2])]; tensor var_485 = squeeze(axes = var_485_axes_0, x = value_cache_5)[name = string("op_485")]; tensor var_490 = const()[name = string("op_490"), val = tensor([1, 8, 128, 16])]; tensor vc_1 = reshape(shape = var_490, x = var_485)[name = string("vc_1")]; tensor var_493_axes_0 = const()[name = string("op_493_axes_0"), val = tensor([2])]; tensor var_493 = expand_dims(axes = var_493_axes_0, x = kc_1)[name = string("op_493")]; tensor var_501_reps_0 = const()[name = string("op_501_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_501 = tile(reps = var_501_reps_0, x = var_493)[name = string("op_501")]; tensor var_506 = const()[name = string("op_506"), val = tensor([1, 16, 128, 16])]; tensor kc_3 = reshape(shape = var_506, x = var_501)[name = string("kc_3")]; tensor var_509_axes_0 = const()[name = string("op_509_axes_0"), val = tensor([2])]; tensor var_509 = expand_dims(axes = var_509_axes_0, x = vc_1)[name = string("op_509")]; tensor var_517_reps_0 = const()[name = string("op_517_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_517 = tile(reps = var_517_reps_0, x = var_509)[name = string("op_517")]; tensor var_522 = const()[name = string("op_522"), val = tensor([1, 16, 128, 16])]; tensor vc_3 = reshape(shape = var_522, x = var_517)[name = string("vc_3")]; bool var_524_transpose_x_0 = const()[name = string("op_524_transpose_x_0"), val = bool(false)]; bool var_524_transpose_y_0 = const()[name = string("op_524_transpose_y_0"), val = bool(false)]; tensor var_524 = matmul(transpose_x = var_524_transpose_x_0, transpose_y = var_524_transpose_y_0, x = q_5, y = kc_3)[name = string("op_524")]; fp32 _inversed_attn_weights_1_y_0 = const()[name = string("_inversed_attn_weights_1_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_1 = mul(x = var_524, y = _inversed_attn_weights_1_y_0)[name = string("_inversed_attn_weights_1")]; tensor var_528_axes_0 = const()[name = string("op_528_axes_0"), val = tensor([1])]; tensor cast_2 = cast(dtype = cast_2_dtype_0, x = key_padding_mask)[name = string("cast_4")]; tensor var_528 = expand_dims(axes = var_528_axes_0, x = cast_2)[name = string("op_528")]; tensor mask_1_axes_0 = const()[name = string("mask_1_axes_0"), val = tensor([2])]; tensor mask_1 = expand_dims(axes = mask_1_axes_0, x = var_528)[name = string("mask_1")]; tensor attn_weights_3 = add(x = _inversed_attn_weights_1, y = mask_1)[name = string("attn_weights_3")]; int32 var_538 = const()[name = string("op_538"), val = int32(-1)]; tensor attn_weights_7 = softmax(axis = var_538, x = attn_weights_3)[name = string("attn_weights_7")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(false)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(true)]; tensor attn_output_1 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_7, y = vc_3)[name = string("attn_output_1")]; tensor var_547_perm_0 = const()[name = string("op_547_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_551 = const()[name = string("op_551"), val = tensor([1, 1, -1])]; tensor var_547 = transpose(perm = var_547_perm_0, x = attn_output_1)[name = string("transpose_16")]; tensor input_3 = reshape(shape = var_551, x = var_547)[name = string("input_3")]; tensor attn_output_3 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_o_proj_weight_palettized, x = input_3)[name = string("linear_3")]; tensor var_557_axes_0 = const()[name = string("op_557_axes_0"), val = tensor([0])]; tensor var_557 = squeeze(axes = var_557_axes_0, x = attn_output_3)[name = string("op_557")]; tensor var_559_axes_0 = const()[name = string("op_559_axes_0"), val = tensor([0])]; tensor var_559 = squeeze(axes = var_559_axes_0, x = var_557)[name = string("op_559")]; tensor var_561_axes_0 = const()[name = string("op_561_axes_0"), val = tensor([-1])]; tensor var_561 = expand_dims(axes = var_561_axes_0, x = var_559)[name = string("op_561")]; tensor attn_4d_1_axes_0 = const()[name = string("attn_4d_1_axes_0"), val = tensor([-1])]; tensor attn_4d_1 = expand_dims(axes = attn_4d_1_axes_0, x = var_561)[name = string("attn_4d_1")]; tensor hidden_1 = add(x = cast_0, y = attn_4d_1)[name = string("hidden_1")]; tensor var_567_axes_0 = const()[name = string("op_567_axes_0"), val = tensor([-1])]; tensor var_567 = squeeze(axes = var_567_axes_0, x = hidden_1)[name = string("op_567")]; tensor var_569_axes_0 = const()[name = string("op_569_axes_0"), val = tensor([-1])]; tensor var_569 = squeeze(axes = var_569_axes_0, x = var_567)[name = string("op_569")]; tensor hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor([0])]; tensor hidden_states_19 = expand_dims(axes = hidden_states_19_axes_0, x = var_569)[name = string("hidden_states_19")]; fp32 var_575_promoted = const()[name = string("op_575_promoted"), val = fp32(0x1p+1)]; tensor var_581 = pow(x = hidden_states_19, y = var_575_promoted)[name = string("op_581")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_581)[name = string("variance_7")]; fp32 var_584 = const()[name = string("op_584"), val = fp32(0x1.0c6f7ap-20)]; tensor var_585 = add(x = variance_7, y = var_584)[name = string("op_585")]; fp32 var_586_epsilon_0 = const()[name = string("op_586_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_586 = rsqrt(epsilon = var_586_epsilon_0, x = var_585)[name = string("op_586")]; tensor hidden_states_23 = mul(x = hidden_states_19, y = var_586)[name = string("hidden_states_23")]; tensor const_10 = const()[name = string("const_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110176192)))]; tensor input_5 = mul(x = const_10, y = hidden_states_23)[name = string("input_5")]; tensor linear_4_bias_0 = const()[name = string("linear_4_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180352)))]; tensor input_7 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_5)[name = string("linear_4")]; tensor var_596 = silu(x = input_7)[name = string("op_596")]; tensor var_598 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_5)[name = string("linear_5")]; tensor input_9 = mul(x = var_596, y = var_598)[name = string("input_9")]; tensor mlp_out_1 = linear(bias = linear_1_bias_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_9)[name = string("linear_6")]; tensor var_603_axes_0 = const()[name = string("op_603_axes_0"), val = tensor([0])]; tensor var_603 = squeeze(axes = var_603_axes_0, x = mlp_out_1)[name = string("op_603")]; tensor var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor([0])]; tensor var_605 = squeeze(axes = var_605_axes_0, x = var_603)[name = string("op_605")]; tensor var_607_axes_0 = const()[name = string("op_607_axes_0"), val = tensor([-1])]; tensor var_607 = expand_dims(axes = var_607_axes_0, x = var_605)[name = string("op_607")]; tensor mlp_4d_1_axes_0 = const()[name = string("mlp_4d_1_axes_0"), val = tensor([-1])]; tensor mlp_4d_1 = expand_dims(axes = mlp_4d_1_axes_0, x = var_607)[name = string("mlp_4d_1")]; tensor hidden_3 = add(x = hidden_1, y = mlp_4d_1)[name = string("hidden_3")]; tensor var_621_begin_0 = const()[name = string("op_621_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_621_end_0 = const()[name = string("op_621_end_0"), val = tensor([1, 2048, 1, 16])]; tensor var_621_end_mask_0 = const()[name = string("op_621_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_621 = slice_by_index(begin = var_621_begin_0, end = var_621_end_0, end_mask = var_621_end_mask_0, x = cast_1)[name = string("op_621")]; tensor var_641_begin_0 = const()[name = string("op_641_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_641_end_0 = const()[name = string("op_641_end_0"), val = tensor([1, 2048, 1, 16])]; tensor var_641_end_mask_0 = const()[name = string("op_641_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_641 = slice_by_index(begin = var_641_begin_0, end = var_641_end_0, end_mask = var_641_end_mask_0, x = cast_4)[name = string("op_641")]; tensor var_653_axes_0 = const()[name = string("op_653_axes_0"), val = tensor([-1])]; tensor var_653 = squeeze(axes = var_653_axes_0, x = hidden_3)[name = string("op_653")]; tensor var_655_axes_0 = const()[name = string("op_655_axes_0"), val = tensor([-1])]; tensor var_655 = squeeze(axes = var_655_axes_0, x = var_653)[name = string("op_655")]; tensor hidden_states_25_axes_0 = const()[name = string("hidden_states_25_axes_0"), val = tensor([0])]; tensor hidden_states_25 = expand_dims(axes = hidden_states_25_axes_0, x = var_655)[name = string("hidden_states_25")]; fp32 var_661_promoted = const()[name = string("op_661_promoted"), val = fp32(0x1p+1)]; tensor var_667 = pow(x = hidden_states_25, y = var_661_promoted)[name = string("op_667")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_667)[name = string("variance_9")]; fp32 var_670 = const()[name = string("op_670"), val = fp32(0x1.0c6f7ap-20)]; tensor var_671 = add(x = variance_9, y = var_670)[name = string("op_671")]; fp32 var_672_epsilon_0 = const()[name = string("op_672_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_672 = rsqrt(epsilon = var_672_epsilon_0, x = var_671)[name = string("op_672")]; tensor hidden_states_29 = mul(x = hidden_states_25, y = var_672)[name = string("hidden_states_29")]; tensor const_11 = const()[name = string("const_11"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110192704)))]; tensor input_11 = mul(x = const_11, y = hidden_states_29)[name = string("input_11")]; tensor var_678 = linear(bias = linear_0_bias_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = input_11)[name = string("linear_7")]; tensor var_683 = const()[name = string("op_683"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_31 = reshape(shape = var_683, x = var_678)[name = string("hidden_states_31")]; tensor var_687 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = input_11)[name = string("linear_8")]; tensor var_692 = const()[name = string("op_692"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_37 = reshape(shape = var_692, x = var_687)[name = string("hidden_states_37")]; tensor var_696 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = input_11)[name = string("linear_9")]; tensor var_701 = const()[name = string("op_701"), val = tensor([1, 1, 8, 128])]; tensor v_5 = reshape(shape = var_701, x = var_696)[name = string("v_5")]; fp32 var_706_promoted = const()[name = string("op_706_promoted"), val = fp32(0x1p+1)]; tensor var_712 = pow(x = hidden_states_31, y = var_706_promoted)[name = string("op_712")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_712)[name = string("variance_11")]; fp32 var_715 = const()[name = string("op_715"), val = fp32(0x1.0c6f7ap-20)]; tensor var_716 = add(x = variance_11, y = var_715)[name = string("op_716")]; fp32 var_717_epsilon_0 = const()[name = string("op_717_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_717 = rsqrt(epsilon = var_717_epsilon_0, x = var_716)[name = string("op_717")]; tensor hidden_states_35 = mul(x = hidden_states_31, y = var_717)[name = string("hidden_states_35")]; tensor const_12 = const()[name = string("const_12"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110196864)))]; tensor q_7 = mul(x = const_12, y = hidden_states_35)[name = string("q_7")]; fp32 var_724_promoted = const()[name = string("op_724_promoted"), val = fp32(0x1p+1)]; tensor var_730 = pow(x = hidden_states_37, y = var_724_promoted)[name = string("op_730")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_730)[name = string("variance_13")]; fp32 var_733 = const()[name = string("op_733"), val = fp32(0x1.0c6f7ap-20)]; tensor var_734 = add(x = variance_13, y = var_733)[name = string("op_734")]; fp32 var_735_epsilon_0 = const()[name = string("op_735_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_735 = rsqrt(epsilon = var_735_epsilon_0, x = var_734)[name = string("op_735")]; tensor hidden_states_41 = mul(x = hidden_states_37, y = var_735)[name = string("hidden_states_41")]; tensor const_13 = const()[name = string("const_13"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110197440)))]; tensor k_7 = mul(x = const_13, y = hidden_states_41)[name = string("k_7")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_7_perm_0 = const()[name = string("v_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_9 = transpose(perm = q_9_perm_0, x = q_7)[name = string("transpose_15")]; tensor var_752 = mul(x = q_9, y = cos_r_1)[name = string("op_752")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_9)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_9)[name = string("x2_5")]; fp32 const_16_promoted = const()[name = string("const_16_promoted"), val = fp32(-0x1p+0)]; tensor var_773 = mul(x = x2_5, y = const_16_promoted)[name = string("op_773")]; int32 var_775 = const()[name = string("op_775"), val = int32(-1)]; bool var_776_interleave_0 = const()[name = string("op_776_interleave_0"), val = bool(false)]; tensor var_776 = concat(axis = var_775, interleave = var_776_interleave_0, values = (var_773, x1_5))[name = string("op_776")]; tensor var_777 = mul(x = var_776, y = sin_r_1)[name = string("op_777")]; tensor q_11 = add(x = var_752, y = var_777)[name = string("q_11")]; tensor k_9 = transpose(perm = k_9_perm_0, x = k_7)[name = string("transpose_14")]; tensor var_780 = mul(x = k_9, y = cos_r_1)[name = string("op_780")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_9)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_9)[name = string("x2_7")]; fp32 const_19_promoted = const()[name = string("const_19_promoted"), val = fp32(-0x1p+0)]; tensor var_801 = mul(x = x2_7, y = const_19_promoted)[name = string("op_801")]; int32 var_803 = const()[name = string("op_803"), val = int32(-1)]; bool var_804_interleave_0 = const()[name = string("op_804_interleave_0"), val = bool(false)]; tensor var_804 = concat(axis = var_803, interleave = var_804_interleave_0, values = (var_801, x1_7))[name = string("op_804")]; tensor var_805 = mul(x = var_804, y = sin_r_1)[name = string("op_805")]; tensor k_11 = add(x = var_780, y = var_805)[name = string("k_11")]; tensor var_812 = const()[name = string("op_812"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_3 = reshape(shape = var_812, x = k_11)[name = string("nk_flat_3")]; tensor var_818 = const()[name = string("op_818"), val = tensor([1, 1024, 1, 1])]; tensor v_7 = transpose(perm = v_7_perm_0, x = v_5)[name = string("transpose_13")]; tensor nv_flat_3 = reshape(shape = var_818, x = v_7)[name = string("nv_flat_3")]; tensor var_827 = mul(x = var_621, y = var_464)[name = string("op_827")]; tensor var_828 = mul(x = nk_flat_3, y = update_mask_1)[name = string("op_828")]; tensor key_cache_9 = add(x = var_827, y = var_828)[name = string("key_cache_9")]; tensor var_834 = mul(x = var_641, y = var_464)[name = string("op_834")]; tensor var_835 = mul(x = nv_flat_3, y = update_mask_1)[name = string("op_835")]; tensor value_cache_9 = add(x = var_834, y = var_835)[name = string("value_cache_9")]; tensor var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor([2])]; tensor var_839 = squeeze(axes = var_839_axes_0, x = key_cache_9)[name = string("op_839")]; tensor var_844 = const()[name = string("op_844"), val = tensor([1, 8, 128, 16])]; tensor kc_5 = reshape(shape = var_844, x = var_839)[name = string("kc_5")]; tensor var_847_axes_0 = const()[name = string("op_847_axes_0"), val = tensor([2])]; tensor var_847 = squeeze(axes = var_847_axes_0, x = value_cache_9)[name = string("op_847")]; tensor var_852 = const()[name = string("op_852"), val = tensor([1, 8, 128, 16])]; tensor vc_5 = reshape(shape = var_852, x = var_847)[name = string("vc_5")]; tensor var_855_axes_0 = const()[name = string("op_855_axes_0"), val = tensor([2])]; tensor var_855 = expand_dims(axes = var_855_axes_0, x = kc_5)[name = string("op_855")]; tensor var_863_reps_0 = const()[name = string("op_863_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_863 = tile(reps = var_863_reps_0, x = var_855)[name = string("op_863")]; tensor var_868 = const()[name = string("op_868"), val = tensor([1, 16, 128, 16])]; tensor kc_7 = reshape(shape = var_868, x = var_863)[name = string("kc_7")]; tensor var_871_axes_0 = const()[name = string("op_871_axes_0"), val = tensor([2])]; tensor var_871 = expand_dims(axes = var_871_axes_0, x = vc_5)[name = string("op_871")]; tensor var_879_reps_0 = const()[name = string("op_879_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_879 = tile(reps = var_879_reps_0, x = var_871)[name = string("op_879")]; tensor var_884 = const()[name = string("op_884"), val = tensor([1, 16, 128, 16])]; tensor vc_7 = reshape(shape = var_884, x = var_879)[name = string("vc_7")]; bool var_886_transpose_x_0 = const()[name = string("op_886_transpose_x_0"), val = bool(false)]; bool var_886_transpose_y_0 = const()[name = string("op_886_transpose_y_0"), val = bool(false)]; tensor var_886 = matmul(transpose_x = var_886_transpose_x_0, transpose_y = var_886_transpose_y_0, x = q_11, y = kc_7)[name = string("op_886")]; fp32 _inversed_attn_weights_9_y_0 = const()[name = string("_inversed_attn_weights_9_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_9 = mul(x = var_886, y = _inversed_attn_weights_9_y_0)[name = string("_inversed_attn_weights_9")]; tensor attn_weights_11 = add(x = _inversed_attn_weights_9, y = mask_1)[name = string("attn_weights_11")]; int32 var_900 = const()[name = string("op_900"), val = int32(-1)]; tensor attn_weights_15 = softmax(axis = var_900, x = attn_weights_11)[name = string("attn_weights_15")]; bool attn_output_5_transpose_x_1 = const()[name = string("attn_output_5_transpose_x_1"), val = bool(false)]; bool attn_output_5_transpose_y_1 = const()[name = string("attn_output_5_transpose_y_1"), val = bool(true)]; tensor attn_output_5 = matmul(transpose_x = attn_output_5_transpose_x_1, transpose_y = attn_output_5_transpose_y_1, x = attn_weights_15, y = vc_7)[name = string("attn_output_5")]; tensor var_909_perm_0 = const()[name = string("op_909_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_913 = const()[name = string("op_913"), val = tensor([1, 1, -1])]; tensor var_909 = transpose(perm = var_909_perm_0, x = attn_output_5)[name = string("transpose_12")]; tensor input_13 = reshape(shape = var_913, x = var_909)[name = string("input_13")]; tensor attn_output_7 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_o_proj_weight_palettized, x = input_13)[name = string("linear_10")]; tensor var_919_axes_0 = const()[name = string("op_919_axes_0"), val = tensor([0])]; tensor var_919 = squeeze(axes = var_919_axes_0, x = attn_output_7)[name = string("op_919")]; tensor var_921_axes_0 = const()[name = string("op_921_axes_0"), val = tensor([0])]; tensor var_921 = squeeze(axes = var_921_axes_0, x = var_919)[name = string("op_921")]; tensor var_923_axes_0 = const()[name = string("op_923_axes_0"), val = tensor([-1])]; tensor var_923 = expand_dims(axes = var_923_axes_0, x = var_921)[name = string("op_923")]; tensor attn_4d_3_axes_0 = const()[name = string("attn_4d_3_axes_0"), val = tensor([-1])]; tensor attn_4d_3 = expand_dims(axes = attn_4d_3_axes_0, x = var_923)[name = string("attn_4d_3")]; tensor hidden_5 = add(x = hidden_3, y = attn_4d_3)[name = string("hidden_5")]; tensor var_929_axes_0 = const()[name = string("op_929_axes_0"), val = tensor([-1])]; tensor var_929 = squeeze(axes = var_929_axes_0, x = hidden_5)[name = string("op_929")]; tensor var_931_axes_0 = const()[name = string("op_931_axes_0"), val = tensor([-1])]; tensor var_931 = squeeze(axes = var_931_axes_0, x = var_929)[name = string("op_931")]; tensor hidden_states_43_axes_0 = const()[name = string("hidden_states_43_axes_0"), val = tensor([0])]; tensor hidden_states_43 = expand_dims(axes = hidden_states_43_axes_0, x = var_931)[name = string("hidden_states_43")]; fp32 var_937_promoted = const()[name = string("op_937_promoted"), val = fp32(0x1p+1)]; tensor var_943 = pow(x = hidden_states_43, y = var_937_promoted)[name = string("op_943")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_943)[name = string("variance_15")]; fp32 var_946 = const()[name = string("op_946"), val = fp32(0x1.0c6f7ap-20)]; tensor var_947 = add(x = variance_15, y = var_946)[name = string("op_947")]; fp32 var_948_epsilon_0 = const()[name = string("op_948_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_948 = rsqrt(epsilon = var_948_epsilon_0, x = var_947)[name = string("op_948")]; tensor hidden_states_47 = mul(x = hidden_states_43, y = var_948)[name = string("hidden_states_47")]; tensor const_20 = const()[name = string("const_20"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110198016)))]; tensor input_15 = mul(x = const_20, y = hidden_states_47)[name = string("input_15")]; tensor input_17 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_15)[name = string("linear_11")]; tensor var_958 = silu(x = input_17)[name = string("op_958")]; tensor var_960 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_15)[name = string("linear_12")]; tensor input_19 = mul(x = var_958, y = var_960)[name = string("input_19")]; tensor mlp_out_3 = linear(bias = linear_1_bias_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_19)[name = string("linear_13")]; tensor var_965_axes_0 = const()[name = string("op_965_axes_0"), val = tensor([0])]; tensor var_965 = squeeze(axes = var_965_axes_0, x = mlp_out_3)[name = string("op_965")]; tensor var_967_axes_0 = const()[name = string("op_967_axes_0"), val = tensor([0])]; tensor var_967 = squeeze(axes = var_967_axes_0, x = var_965)[name = string("op_967")]; tensor var_969_axes_0 = const()[name = string("op_969_axes_0"), val = tensor([-1])]; tensor var_969 = expand_dims(axes = var_969_axes_0, x = var_967)[name = string("op_969")]; tensor mlp_4d_3_axes_0 = const()[name = string("mlp_4d_3_axes_0"), val = tensor([-1])]; tensor mlp_4d_3 = expand_dims(axes = mlp_4d_3_axes_0, x = var_969)[name = string("mlp_4d_3")]; tensor hidden_7 = add(x = hidden_5, y = mlp_4d_3)[name = string("hidden_7")]; tensor var_983_begin_0 = const()[name = string("op_983_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_983_end_0 = const()[name = string("op_983_end_0"), val = tensor([1, 3072, 1, 16])]; tensor var_983_end_mask_0 = const()[name = string("op_983_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_983 = slice_by_index(begin = var_983_begin_0, end = var_983_end_0, end_mask = var_983_end_mask_0, x = cast_1)[name = string("op_983")]; tensor var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1003_end_0 = const()[name = string("op_1003_end_0"), val = tensor([1, 3072, 1, 16])]; tensor var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1003 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = cast_4)[name = string("op_1003")]; tensor var_1015_axes_0 = const()[name = string("op_1015_axes_0"), val = tensor([-1])]; tensor var_1015 = squeeze(axes = var_1015_axes_0, x = hidden_7)[name = string("op_1015")]; tensor var_1017_axes_0 = const()[name = string("op_1017_axes_0"), val = tensor([-1])]; tensor var_1017 = squeeze(axes = var_1017_axes_0, x = var_1015)[name = string("op_1017")]; tensor hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor([0])]; tensor hidden_states_49 = expand_dims(axes = hidden_states_49_axes_0, x = var_1017)[name = string("hidden_states_49")]; fp32 var_1023_promoted = const()[name = string("op_1023_promoted"), val = fp32(0x1p+1)]; tensor var_1029 = pow(x = hidden_states_49, y = var_1023_promoted)[name = string("op_1029")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1029)[name = string("variance_17")]; fp32 var_1032 = const()[name = string("op_1032"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1033 = add(x = variance_17, y = var_1032)[name = string("op_1033")]; fp32 var_1034_epsilon_0 = const()[name = string("op_1034_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1034 = rsqrt(epsilon = var_1034_epsilon_0, x = var_1033)[name = string("op_1034")]; tensor hidden_states_53 = mul(x = hidden_states_49, y = var_1034)[name = string("hidden_states_53")]; tensor const_21 = const()[name = string("const_21"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110202176)))]; tensor input_21 = mul(x = const_21, y = hidden_states_53)[name = string("input_21")]; tensor var_1040 = linear(bias = linear_0_bias_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = input_21)[name = string("linear_14")]; tensor var_1045 = const()[name = string("op_1045"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_55 = reshape(shape = var_1045, x = var_1040)[name = string("hidden_states_55")]; tensor var_1049 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = input_21)[name = string("linear_15")]; tensor var_1054 = const()[name = string("op_1054"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_61 = reshape(shape = var_1054, x = var_1049)[name = string("hidden_states_61")]; tensor var_1058 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = input_21)[name = string("linear_16")]; tensor var_1063 = const()[name = string("op_1063"), val = tensor([1, 1, 8, 128])]; tensor v_9 = reshape(shape = var_1063, x = var_1058)[name = string("v_9")]; fp32 var_1068_promoted = const()[name = string("op_1068_promoted"), val = fp32(0x1p+1)]; tensor var_1074 = pow(x = hidden_states_55, y = var_1068_promoted)[name = string("op_1074")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1074)[name = string("variance_19")]; fp32 var_1077 = const()[name = string("op_1077"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1078 = add(x = variance_19, y = var_1077)[name = string("op_1078")]; fp32 var_1079_epsilon_0 = const()[name = string("op_1079_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1079 = rsqrt(epsilon = var_1079_epsilon_0, x = var_1078)[name = string("op_1079")]; tensor hidden_states_59 = mul(x = hidden_states_55, y = var_1079)[name = string("hidden_states_59")]; tensor const_22 = const()[name = string("const_22"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110206336)))]; tensor q_13 = mul(x = const_22, y = hidden_states_59)[name = string("q_13")]; fp32 var_1086_promoted = const()[name = string("op_1086_promoted"), val = fp32(0x1p+1)]; tensor var_1092 = pow(x = hidden_states_61, y = var_1086_promoted)[name = string("op_1092")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1092)[name = string("variance_21")]; fp32 var_1095 = const()[name = string("op_1095"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1096 = add(x = variance_21, y = var_1095)[name = string("op_1096")]; fp32 var_1097_epsilon_0 = const()[name = string("op_1097_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1097 = rsqrt(epsilon = var_1097_epsilon_0, x = var_1096)[name = string("op_1097")]; tensor hidden_states_65 = mul(x = hidden_states_61, y = var_1097)[name = string("hidden_states_65")]; tensor const_23 = const()[name = string("const_23"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110206912)))]; tensor k_13 = mul(x = const_23, y = hidden_states_65)[name = string("k_13")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_11_perm_0 = const()[name = string("v_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_15 = transpose(perm = q_15_perm_0, x = q_13)[name = string("transpose_11")]; tensor var_1114 = mul(x = q_15, y = cos_r_1)[name = string("op_1114")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_15)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_15)[name = string("x2_9")]; fp32 const_26_promoted = const()[name = string("const_26_promoted"), val = fp32(-0x1p+0)]; tensor var_1135 = mul(x = x2_9, y = const_26_promoted)[name = string("op_1135")]; int32 var_1137 = const()[name = string("op_1137"), val = int32(-1)]; bool var_1138_interleave_0 = const()[name = string("op_1138_interleave_0"), val = bool(false)]; tensor var_1138 = concat(axis = var_1137, interleave = var_1138_interleave_0, values = (var_1135, x1_9))[name = string("op_1138")]; tensor var_1139 = mul(x = var_1138, y = sin_r_1)[name = string("op_1139")]; tensor q_17 = add(x = var_1114, y = var_1139)[name = string("q_17")]; tensor k_15 = transpose(perm = k_15_perm_0, x = k_13)[name = string("transpose_10")]; tensor var_1142 = mul(x = k_15, y = cos_r_1)[name = string("op_1142")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_15)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_15)[name = string("x2_11")]; fp32 const_29_promoted = const()[name = string("const_29_promoted"), val = fp32(-0x1p+0)]; tensor var_1163 = mul(x = x2_11, y = const_29_promoted)[name = string("op_1163")]; int32 var_1165 = const()[name = string("op_1165"), val = int32(-1)]; bool var_1166_interleave_0 = const()[name = string("op_1166_interleave_0"), val = bool(false)]; tensor var_1166 = concat(axis = var_1165, interleave = var_1166_interleave_0, values = (var_1163, x1_11))[name = string("op_1166")]; tensor var_1167 = mul(x = var_1166, y = sin_r_1)[name = string("op_1167")]; tensor k_17 = add(x = var_1142, y = var_1167)[name = string("k_17")]; tensor var_1174 = const()[name = string("op_1174"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_5 = reshape(shape = var_1174, x = k_17)[name = string("nk_flat_5")]; tensor var_1180 = const()[name = string("op_1180"), val = tensor([1, 1024, 1, 1])]; tensor v_11 = transpose(perm = v_11_perm_0, x = v_9)[name = string("transpose_9")]; tensor nv_flat_5 = reshape(shape = var_1180, x = v_11)[name = string("nv_flat_5")]; tensor var_1189 = mul(x = var_983, y = var_464)[name = string("op_1189")]; tensor var_1190 = mul(x = nk_flat_5, y = update_mask_1)[name = string("op_1190")]; tensor key_cache_13 = add(x = var_1189, y = var_1190)[name = string("key_cache_13")]; tensor var_1196 = mul(x = var_1003, y = var_464)[name = string("op_1196")]; tensor var_1197 = mul(x = nv_flat_5, y = update_mask_1)[name = string("op_1197")]; tensor value_cache_13 = add(x = var_1196, y = var_1197)[name = string("value_cache_13")]; tensor var_1201_axes_0 = const()[name = string("op_1201_axes_0"), val = tensor([2])]; tensor var_1201 = squeeze(axes = var_1201_axes_0, x = key_cache_13)[name = string("op_1201")]; tensor var_1206 = const()[name = string("op_1206"), val = tensor([1, 8, 128, 16])]; tensor kc_9 = reshape(shape = var_1206, x = var_1201)[name = string("kc_9")]; tensor var_1209_axes_0 = const()[name = string("op_1209_axes_0"), val = tensor([2])]; tensor var_1209 = squeeze(axes = var_1209_axes_0, x = value_cache_13)[name = string("op_1209")]; tensor var_1214 = const()[name = string("op_1214"), val = tensor([1, 8, 128, 16])]; tensor vc_9 = reshape(shape = var_1214, x = var_1209)[name = string("vc_9")]; tensor var_1217_axes_0 = const()[name = string("op_1217_axes_0"), val = tensor([2])]; tensor var_1217 = expand_dims(axes = var_1217_axes_0, x = kc_9)[name = string("op_1217")]; tensor var_1225_reps_0 = const()[name = string("op_1225_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1225 = tile(reps = var_1225_reps_0, x = var_1217)[name = string("op_1225")]; tensor var_1230 = const()[name = string("op_1230"), val = tensor([1, 16, 128, 16])]; tensor kc_11 = reshape(shape = var_1230, x = var_1225)[name = string("kc_11")]; tensor var_1233_axes_0 = const()[name = string("op_1233_axes_0"), val = tensor([2])]; tensor var_1233 = expand_dims(axes = var_1233_axes_0, x = vc_9)[name = string("op_1233")]; tensor var_1241_reps_0 = const()[name = string("op_1241_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1241 = tile(reps = var_1241_reps_0, x = var_1233)[name = string("op_1241")]; tensor var_1246 = const()[name = string("op_1246"), val = tensor([1, 16, 128, 16])]; tensor vc_11 = reshape(shape = var_1246, x = var_1241)[name = string("vc_11")]; bool var_1248_transpose_x_0 = const()[name = string("op_1248_transpose_x_0"), val = bool(false)]; bool var_1248_transpose_y_0 = const()[name = string("op_1248_transpose_y_0"), val = bool(false)]; tensor var_1248 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = q_17, y = kc_11)[name = string("op_1248")]; fp32 _inversed_attn_weights_17_y_0 = const()[name = string("_inversed_attn_weights_17_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_17 = mul(x = var_1248, y = _inversed_attn_weights_17_y_0)[name = string("_inversed_attn_weights_17")]; tensor attn_weights_19 = add(x = _inversed_attn_weights_17, y = mask_1)[name = string("attn_weights_19")]; int32 var_1262 = const()[name = string("op_1262"), val = int32(-1)]; tensor attn_weights_23 = softmax(axis = var_1262, x = attn_weights_19)[name = string("attn_weights_23")]; bool attn_output_9_transpose_x_1 = const()[name = string("attn_output_9_transpose_x_1"), val = bool(false)]; bool attn_output_9_transpose_y_1 = const()[name = string("attn_output_9_transpose_y_1"), val = bool(true)]; tensor attn_output_9 = matmul(transpose_x = attn_output_9_transpose_x_1, transpose_y = attn_output_9_transpose_y_1, x = attn_weights_23, y = vc_11)[name = string("attn_output_9")]; tensor var_1271_perm_0 = const()[name = string("op_1271_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1275 = const()[name = string("op_1275"), val = tensor([1, 1, -1])]; tensor var_1271 = transpose(perm = var_1271_perm_0, x = attn_output_9)[name = string("transpose_8")]; tensor input_23 = reshape(shape = var_1275, x = var_1271)[name = string("input_23")]; tensor attn_output_11 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_o_proj_weight_palettized, x = input_23)[name = string("linear_17")]; tensor var_1281_axes_0 = const()[name = string("op_1281_axes_0"), val = tensor([0])]; tensor var_1281 = squeeze(axes = var_1281_axes_0, x = attn_output_11)[name = string("op_1281")]; tensor var_1283_axes_0 = const()[name = string("op_1283_axes_0"), val = tensor([0])]; tensor var_1283 = squeeze(axes = var_1283_axes_0, x = var_1281)[name = string("op_1283")]; tensor var_1285_axes_0 = const()[name = string("op_1285_axes_0"), val = tensor([-1])]; tensor var_1285 = expand_dims(axes = var_1285_axes_0, x = var_1283)[name = string("op_1285")]; tensor attn_4d_5_axes_0 = const()[name = string("attn_4d_5_axes_0"), val = tensor([-1])]; tensor attn_4d_5 = expand_dims(axes = attn_4d_5_axes_0, x = var_1285)[name = string("attn_4d_5")]; tensor hidden_9 = add(x = hidden_7, y = attn_4d_5)[name = string("hidden_9")]; tensor var_1291_axes_0 = const()[name = string("op_1291_axes_0"), val = tensor([-1])]; tensor var_1291 = squeeze(axes = var_1291_axes_0, x = hidden_9)[name = string("op_1291")]; tensor var_1293_axes_0 = const()[name = string("op_1293_axes_0"), val = tensor([-1])]; tensor var_1293 = squeeze(axes = var_1293_axes_0, x = var_1291)[name = string("op_1293")]; tensor hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor([0])]; tensor hidden_states_67 = expand_dims(axes = hidden_states_67_axes_0, x = var_1293)[name = string("hidden_states_67")]; fp32 var_1299_promoted = const()[name = string("op_1299_promoted"), val = fp32(0x1p+1)]; tensor var_1305 = pow(x = hidden_states_67, y = var_1299_promoted)[name = string("op_1305")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1305)[name = string("variance_23")]; fp32 var_1308 = const()[name = string("op_1308"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1309 = add(x = variance_23, y = var_1308)[name = string("op_1309")]; fp32 var_1310_epsilon_0 = const()[name = string("op_1310_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1310 = rsqrt(epsilon = var_1310_epsilon_0, x = var_1309)[name = string("op_1310")]; tensor hidden_states_71 = mul(x = hidden_states_67, y = var_1310)[name = string("hidden_states_71")]; tensor const_30 = const()[name = string("const_30"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110207488)))]; tensor input_25 = mul(x = const_30, y = hidden_states_71)[name = string("input_25")]; tensor input_27 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_25)[name = string("linear_18")]; tensor var_1320 = silu(x = input_27)[name = string("op_1320")]; tensor var_1322 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_25)[name = string("linear_19")]; tensor input_29 = mul(x = var_1320, y = var_1322)[name = string("input_29")]; tensor mlp_out_5 = linear(bias = linear_1_bias_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_29)[name = string("linear_20")]; tensor var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor([0])]; tensor var_1327 = squeeze(axes = var_1327_axes_0, x = mlp_out_5)[name = string("op_1327")]; tensor var_1329_axes_0 = const()[name = string("op_1329_axes_0"), val = tensor([0])]; tensor var_1329 = squeeze(axes = var_1329_axes_0, x = var_1327)[name = string("op_1329")]; tensor var_1331_axes_0 = const()[name = string("op_1331_axes_0"), val = tensor([-1])]; tensor var_1331 = expand_dims(axes = var_1331_axes_0, x = var_1329)[name = string("op_1331")]; tensor mlp_4d_5_axes_0 = const()[name = string("mlp_4d_5_axes_0"), val = tensor([-1])]; tensor mlp_4d_5 = expand_dims(axes = mlp_4d_5_axes_0, x = var_1331)[name = string("mlp_4d_5")]; tensor hidden_11 = add(x = hidden_9, y = mlp_4d_5)[name = string("hidden_11")]; tensor var_1345_begin_0 = const()[name = string("op_1345_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_1345_end_0 = const()[name = string("op_1345_end_0"), val = tensor([1, 4096, 1, 16])]; tensor var_1345_end_mask_0 = const()[name = string("op_1345_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1345 = slice_by_index(begin = var_1345_begin_0, end = var_1345_end_0, end_mask = var_1345_end_mask_0, x = cast_1)[name = string("op_1345")]; tensor var_1365_begin_0 = const()[name = string("op_1365_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_1365_end_0 = const()[name = string("op_1365_end_0"), val = tensor([1, 4096, 1, 16])]; tensor var_1365_end_mask_0 = const()[name = string("op_1365_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1365 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = cast_4)[name = string("op_1365")]; tensor var_1377_axes_0 = const()[name = string("op_1377_axes_0"), val = tensor([-1])]; tensor var_1377 = squeeze(axes = var_1377_axes_0, x = hidden_11)[name = string("op_1377")]; tensor var_1379_axes_0 = const()[name = string("op_1379_axes_0"), val = tensor([-1])]; tensor var_1379 = squeeze(axes = var_1379_axes_0, x = var_1377)[name = string("op_1379")]; tensor hidden_states_73_axes_0 = const()[name = string("hidden_states_73_axes_0"), val = tensor([0])]; tensor hidden_states_73 = expand_dims(axes = hidden_states_73_axes_0, x = var_1379)[name = string("hidden_states_73")]; fp32 var_1385_promoted = const()[name = string("op_1385_promoted"), val = fp32(0x1p+1)]; tensor var_1391 = pow(x = hidden_states_73, y = var_1385_promoted)[name = string("op_1391")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1391)[name = string("variance_25")]; fp32 var_1394 = const()[name = string("op_1394"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1395 = add(x = variance_25, y = var_1394)[name = string("op_1395")]; fp32 var_1396_epsilon_0 = const()[name = string("op_1396_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1396 = rsqrt(epsilon = var_1396_epsilon_0, x = var_1395)[name = string("op_1396")]; tensor hidden_states_77 = mul(x = hidden_states_73, y = var_1396)[name = string("hidden_states_77")]; tensor const_31 = const()[name = string("const_31"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110211648)))]; tensor input_31 = mul(x = const_31, y = hidden_states_77)[name = string("input_31")]; tensor var_1402 = linear(bias = linear_0_bias_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = input_31)[name = string("linear_21")]; tensor var_1407 = const()[name = string("op_1407"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_79 = reshape(shape = var_1407, x = var_1402)[name = string("hidden_states_79")]; tensor var_1411 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = input_31)[name = string("linear_22")]; tensor var_1416 = const()[name = string("op_1416"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_85 = reshape(shape = var_1416, x = var_1411)[name = string("hidden_states_85")]; tensor var_1420 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = input_31)[name = string("linear_23")]; tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 1, 8, 128])]; tensor v_13 = reshape(shape = var_1425, x = var_1420)[name = string("v_13")]; fp32 var_1430_promoted = const()[name = string("op_1430_promoted"), val = fp32(0x1p+1)]; tensor var_1436 = pow(x = hidden_states_79, y = var_1430_promoted)[name = string("op_1436")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1436)[name = string("variance_27")]; fp32 var_1439 = const()[name = string("op_1439"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1440 = add(x = variance_27, y = var_1439)[name = string("op_1440")]; fp32 var_1441_epsilon_0 = const()[name = string("op_1441_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1441 = rsqrt(epsilon = var_1441_epsilon_0, x = var_1440)[name = string("op_1441")]; tensor hidden_states_83 = mul(x = hidden_states_79, y = var_1441)[name = string("hidden_states_83")]; tensor const_32 = const()[name = string("const_32"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110215808)))]; tensor q_19 = mul(x = const_32, y = hidden_states_83)[name = string("q_19")]; fp32 var_1448_promoted = const()[name = string("op_1448_promoted"), val = fp32(0x1p+1)]; tensor var_1454 = pow(x = hidden_states_85, y = var_1448_promoted)[name = string("op_1454")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1454)[name = string("variance_29")]; fp32 var_1457 = const()[name = string("op_1457"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1458 = add(x = variance_29, y = var_1457)[name = string("op_1458")]; fp32 var_1459_epsilon_0 = const()[name = string("op_1459_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1459 = rsqrt(epsilon = var_1459_epsilon_0, x = var_1458)[name = string("op_1459")]; tensor hidden_states_89 = mul(x = hidden_states_85, y = var_1459)[name = string("hidden_states_89")]; tensor const_33 = const()[name = string("const_33"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110216384)))]; tensor k_19 = mul(x = const_33, y = hidden_states_89)[name = string("k_19")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_15_perm_0 = const()[name = string("v_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21 = transpose(perm = q_21_perm_0, x = q_19)[name = string("transpose_7")]; tensor var_1476 = mul(x = q_21, y = cos_r_1)[name = string("op_1476")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_21)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_21)[name = string("x2_13")]; fp32 const_36_promoted = const()[name = string("const_36_promoted"), val = fp32(-0x1p+0)]; tensor var_1497 = mul(x = x2_13, y = const_36_promoted)[name = string("op_1497")]; int32 var_1499 = const()[name = string("op_1499"), val = int32(-1)]; bool var_1500_interleave_0 = const()[name = string("op_1500_interleave_0"), val = bool(false)]; tensor var_1500 = concat(axis = var_1499, interleave = var_1500_interleave_0, values = (var_1497, x1_13))[name = string("op_1500")]; tensor var_1501 = mul(x = var_1500, y = sin_r_1)[name = string("op_1501")]; tensor q_23 = add(x = var_1476, y = var_1501)[name = string("q_23")]; tensor k_21 = transpose(perm = k_21_perm_0, x = k_19)[name = string("transpose_6")]; tensor var_1504 = mul(x = k_21, y = cos_r_1)[name = string("op_1504")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_21)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_21)[name = string("x2_15")]; fp32 const_39_promoted = const()[name = string("const_39_promoted"), val = fp32(-0x1p+0)]; tensor var_1525 = mul(x = x2_15, y = const_39_promoted)[name = string("op_1525")]; int32 var_1527 = const()[name = string("op_1527"), val = int32(-1)]; bool var_1528_interleave_0 = const()[name = string("op_1528_interleave_0"), val = bool(false)]; tensor var_1528 = concat(axis = var_1527, interleave = var_1528_interleave_0, values = (var_1525, x1_15))[name = string("op_1528")]; tensor var_1529 = mul(x = var_1528, y = sin_r_1)[name = string("op_1529")]; tensor k_23 = add(x = var_1504, y = var_1529)[name = string("k_23")]; tensor var_1536 = const()[name = string("op_1536"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_7 = reshape(shape = var_1536, x = k_23)[name = string("nk_flat_7")]; tensor var_1542 = const()[name = string("op_1542"), val = tensor([1, 1024, 1, 1])]; tensor v_15 = transpose(perm = v_15_perm_0, x = v_13)[name = string("transpose_5")]; tensor nv_flat_7 = reshape(shape = var_1542, x = v_15)[name = string("nv_flat_7")]; tensor var_1551 = mul(x = var_1345, y = var_464)[name = string("op_1551")]; tensor var_1552 = mul(x = nk_flat_7, y = update_mask_1)[name = string("op_1552")]; tensor key_cache_17 = add(x = var_1551, y = var_1552)[name = string("key_cache_17")]; tensor var_1558 = mul(x = var_1365, y = var_464)[name = string("op_1558")]; tensor var_1559 = mul(x = nv_flat_7, y = update_mask_1)[name = string("op_1559")]; tensor value_cache_17 = add(x = var_1558, y = var_1559)[name = string("value_cache_17")]; tensor var_1563_axes_0 = const()[name = string("op_1563_axes_0"), val = tensor([2])]; tensor var_1563 = squeeze(axes = var_1563_axes_0, x = key_cache_17)[name = string("op_1563")]; tensor var_1568 = const()[name = string("op_1568"), val = tensor([1, 8, 128, 16])]; tensor kc_13 = reshape(shape = var_1568, x = var_1563)[name = string("kc_13")]; tensor var_1571_axes_0 = const()[name = string("op_1571_axes_0"), val = tensor([2])]; tensor var_1571 = squeeze(axes = var_1571_axes_0, x = value_cache_17)[name = string("op_1571")]; tensor var_1576 = const()[name = string("op_1576"), val = tensor([1, 8, 128, 16])]; tensor vc_13 = reshape(shape = var_1576, x = var_1571)[name = string("vc_13")]; tensor var_1579_axes_0 = const()[name = string("op_1579_axes_0"), val = tensor([2])]; tensor var_1579 = expand_dims(axes = var_1579_axes_0, x = kc_13)[name = string("op_1579")]; tensor var_1587_reps_0 = const()[name = string("op_1587_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1587 = tile(reps = var_1587_reps_0, x = var_1579)[name = string("op_1587")]; tensor var_1592 = const()[name = string("op_1592"), val = tensor([1, 16, 128, 16])]; tensor kc_15 = reshape(shape = var_1592, x = var_1587)[name = string("kc_15")]; tensor var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor([2])]; tensor var_1595 = expand_dims(axes = var_1595_axes_0, x = vc_13)[name = string("op_1595")]; tensor var_1603_reps_0 = const()[name = string("op_1603_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1603 = tile(reps = var_1603_reps_0, x = var_1595)[name = string("op_1603")]; tensor var_1608 = const()[name = string("op_1608"), val = tensor([1, 16, 128, 16])]; tensor vc_15 = reshape(shape = var_1608, x = var_1603)[name = string("vc_15")]; bool var_1610_transpose_x_0 = const()[name = string("op_1610_transpose_x_0"), val = bool(false)]; bool var_1610_transpose_y_0 = const()[name = string("op_1610_transpose_y_0"), val = bool(false)]; tensor var_1610 = matmul(transpose_x = var_1610_transpose_x_0, transpose_y = var_1610_transpose_y_0, x = q_23, y = kc_15)[name = string("op_1610")]; fp32 _inversed_attn_weights_25_y_0 = const()[name = string("_inversed_attn_weights_25_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_25 = mul(x = var_1610, y = _inversed_attn_weights_25_y_0)[name = string("_inversed_attn_weights_25")]; tensor attn_weights_27 = add(x = _inversed_attn_weights_25, y = mask_1)[name = string("attn_weights_27")]; int32 var_1624 = const()[name = string("op_1624"), val = int32(-1)]; tensor attn_weights_31 = softmax(axis = var_1624, x = attn_weights_27)[name = string("attn_weights_31")]; bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(false)]; bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(true)]; tensor attn_output_13 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_31, y = vc_15)[name = string("attn_output_13")]; tensor var_1633_perm_0 = const()[name = string("op_1633_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1637 = const()[name = string("op_1637"), val = tensor([1, 1, -1])]; tensor var_1633 = transpose(perm = var_1633_perm_0, x = attn_output_13)[name = string("transpose_4")]; tensor input_33 = reshape(shape = var_1637, x = var_1633)[name = string("input_33")]; tensor attn_output_15 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_o_proj_weight_palettized, x = input_33)[name = string("linear_24")]; tensor var_1643_axes_0 = const()[name = string("op_1643_axes_0"), val = tensor([0])]; tensor var_1643 = squeeze(axes = var_1643_axes_0, x = attn_output_15)[name = string("op_1643")]; tensor var_1645_axes_0 = const()[name = string("op_1645_axes_0"), val = tensor([0])]; tensor var_1645 = squeeze(axes = var_1645_axes_0, x = var_1643)[name = string("op_1645")]; tensor var_1647_axes_0 = const()[name = string("op_1647_axes_0"), val = tensor([-1])]; tensor var_1647 = expand_dims(axes = var_1647_axes_0, x = var_1645)[name = string("op_1647")]; tensor attn_4d_7_axes_0 = const()[name = string("attn_4d_7_axes_0"), val = tensor([-1])]; tensor attn_4d_7 = expand_dims(axes = attn_4d_7_axes_0, x = var_1647)[name = string("attn_4d_7")]; tensor hidden_13 = add(x = hidden_11, y = attn_4d_7)[name = string("hidden_13")]; tensor var_1653_axes_0 = const()[name = string("op_1653_axes_0"), val = tensor([-1])]; tensor var_1653 = squeeze(axes = var_1653_axes_0, x = hidden_13)[name = string("op_1653")]; tensor var_1655_axes_0 = const()[name = string("op_1655_axes_0"), val = tensor([-1])]; tensor var_1655 = squeeze(axes = var_1655_axes_0, x = var_1653)[name = string("op_1655")]; tensor hidden_states_91_axes_0 = const()[name = string("hidden_states_91_axes_0"), val = tensor([0])]; tensor hidden_states_91 = expand_dims(axes = hidden_states_91_axes_0, x = var_1655)[name = string("hidden_states_91")]; fp32 var_1661_promoted = const()[name = string("op_1661_promoted"), val = fp32(0x1p+1)]; tensor var_1667 = pow(x = hidden_states_91, y = var_1661_promoted)[name = string("op_1667")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1667)[name = string("variance_31")]; fp32 var_1670 = const()[name = string("op_1670"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1671 = add(x = variance_31, y = var_1670)[name = string("op_1671")]; fp32 var_1672_epsilon_0 = const()[name = string("op_1672_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1672 = rsqrt(epsilon = var_1672_epsilon_0, x = var_1671)[name = string("op_1672")]; tensor hidden_states_95 = mul(x = hidden_states_91, y = var_1672)[name = string("hidden_states_95")]; tensor const_40 = const()[name = string("const_40"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110216960)))]; tensor input_35 = mul(x = const_40, y = hidden_states_95)[name = string("input_35")]; tensor input_37 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_35)[name = string("linear_25")]; tensor var_1682 = silu(x = input_37)[name = string("op_1682")]; tensor var_1684 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_35)[name = string("linear_26")]; tensor input_39 = mul(x = var_1682, y = var_1684)[name = string("input_39")]; tensor mlp_out_7 = linear(bias = linear_1_bias_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_39)[name = string("linear_27")]; tensor var_1689_axes_0 = const()[name = string("op_1689_axes_0"), val = tensor([0])]; tensor var_1689 = squeeze(axes = var_1689_axes_0, x = mlp_out_7)[name = string("op_1689")]; tensor var_1691_axes_0 = const()[name = string("op_1691_axes_0"), val = tensor([0])]; tensor var_1691 = squeeze(axes = var_1691_axes_0, x = var_1689)[name = string("op_1691")]; tensor var_1693_axes_0 = const()[name = string("op_1693_axes_0"), val = tensor([-1])]; tensor var_1693 = expand_dims(axes = var_1693_axes_0, x = var_1691)[name = string("op_1693")]; tensor mlp_4d_7_axes_0 = const()[name = string("mlp_4d_7_axes_0"), val = tensor([-1])]; tensor mlp_4d_7 = expand_dims(axes = mlp_4d_7_axes_0, x = var_1693)[name = string("mlp_4d_7")]; tensor hidden_15 = add(x = hidden_13, y = mlp_4d_7)[name = string("hidden_15")]; tensor var_1707_begin_0 = const()[name = string("op_1707_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_1707_end_0 = const()[name = string("op_1707_end_0"), val = tensor([1, 1, 1, 16])]; tensor var_1707_end_mask_0 = const()[name = string("op_1707_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1707 = slice_by_index(begin = var_1707_begin_0, end = var_1707_end_0, end_mask = var_1707_end_mask_0, x = cast_1)[name = string("op_1707")]; tensor var_1727_begin_0 = const()[name = string("op_1727_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_1727_end_0 = const()[name = string("op_1727_end_0"), val = tensor([1, 1, 1, 16])]; tensor var_1727_end_mask_0 = const()[name = string("op_1727_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1727 = slice_by_index(begin = var_1727_begin_0, end = var_1727_end_0, end_mask = var_1727_end_mask_0, x = cast_4)[name = string("op_1727")]; tensor var_1739_axes_0 = const()[name = string("op_1739_axes_0"), val = tensor([-1])]; tensor var_1739 = squeeze(axes = var_1739_axes_0, x = hidden_15)[name = string("op_1739")]; tensor var_1741_axes_0 = const()[name = string("op_1741_axes_0"), val = tensor([-1])]; tensor var_1741 = squeeze(axes = var_1741_axes_0, x = var_1739)[name = string("op_1741")]; tensor hidden_states_97_axes_0 = const()[name = string("hidden_states_97_axes_0"), val = tensor([0])]; tensor hidden_states_97 = expand_dims(axes = hidden_states_97_axes_0, x = var_1741)[name = string("hidden_states_97")]; fp32 var_1747_promoted = const()[name = string("op_1747_promoted"), val = fp32(0x1p+1)]; tensor var_1753 = pow(x = hidden_states_97, y = var_1747_promoted)[name = string("op_1753")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1753)[name = string("variance_33")]; fp32 var_1756 = const()[name = string("op_1756"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1757 = add(x = variance_33, y = var_1756)[name = string("op_1757")]; fp32 var_1758_epsilon_0 = const()[name = string("op_1758_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1758 = rsqrt(epsilon = var_1758_epsilon_0, x = var_1757)[name = string("op_1758")]; tensor hidden_states_101 = mul(x = hidden_states_97, y = var_1758)[name = string("hidden_states_101")]; tensor const_41 = const()[name = string("const_41"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110221120)))]; tensor input_41 = mul(x = const_41, y = hidden_states_101)[name = string("input_41")]; tensor var_1764 = linear(bias = linear_0_bias_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = input_41)[name = string("linear_28")]; tensor var_1769 = const()[name = string("op_1769"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_103 = reshape(shape = var_1769, x = var_1764)[name = string("hidden_states_103")]; tensor var_1773 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = input_41)[name = string("linear_29")]; tensor var_1778 = const()[name = string("op_1778"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_109 = reshape(shape = var_1778, x = var_1773)[name = string("hidden_states_109")]; tensor var_1782 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = input_41)[name = string("linear_30")]; tensor var_1787 = const()[name = string("op_1787"), val = tensor([1, 1, 8, 128])]; tensor v_17 = reshape(shape = var_1787, x = var_1782)[name = string("v_17")]; fp32 var_1792_promoted = const()[name = string("op_1792_promoted"), val = fp32(0x1p+1)]; tensor var_1798 = pow(x = hidden_states_103, y = var_1792_promoted)[name = string("op_1798")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1798)[name = string("variance_35")]; fp32 var_1801 = const()[name = string("op_1801"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1802 = add(x = variance_35, y = var_1801)[name = string("op_1802")]; fp32 var_1803_epsilon_0 = const()[name = string("op_1803_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1803 = rsqrt(epsilon = var_1803_epsilon_0, x = var_1802)[name = string("op_1803")]; tensor hidden_states_107 = mul(x = hidden_states_103, y = var_1803)[name = string("hidden_states_107")]; tensor const_42 = const()[name = string("const_42"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110225280)))]; tensor q_25 = mul(x = const_42, y = hidden_states_107)[name = string("q_25")]; fp32 var_1810_promoted = const()[name = string("op_1810_promoted"), val = fp32(0x1p+1)]; tensor var_1816 = pow(x = hidden_states_109, y = var_1810_promoted)[name = string("op_1816")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1816)[name = string("variance_37")]; fp32 var_1819 = const()[name = string("op_1819"), val = fp32(0x1.0c6f7ap-20)]; tensor var_1820 = add(x = variance_37, y = var_1819)[name = string("op_1820")]; fp32 var_1821_epsilon_0 = const()[name = string("op_1821_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1821 = rsqrt(epsilon = var_1821_epsilon_0, x = var_1820)[name = string("op_1821")]; tensor hidden_states_113 = mul(x = hidden_states_109, y = var_1821)[name = string("hidden_states_113")]; tensor const_43 = const()[name = string("const_43"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110225856)))]; tensor k_25 = mul(x = const_43, y = hidden_states_113)[name = string("k_25")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_perm_0 = const()[name = string("v_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_27 = transpose(perm = q_27_perm_0, x = q_25)[name = string("transpose_3")]; tensor var_1838 = mul(x = q_27, y = cos_r_1)[name = string("op_1838")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_27)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_27)[name = string("x2_17")]; fp32 const_46_promoted = const()[name = string("const_46_promoted"), val = fp32(-0x1p+0)]; tensor var_1859 = mul(x = x2_17, y = const_46_promoted)[name = string("op_1859")]; int32 var_1861 = const()[name = string("op_1861"), val = int32(-1)]; bool var_1862_interleave_0 = const()[name = string("op_1862_interleave_0"), val = bool(false)]; tensor var_1862 = concat(axis = var_1861, interleave = var_1862_interleave_0, values = (var_1859, x1_17))[name = string("op_1862")]; tensor var_1863 = mul(x = var_1862, y = sin_r_1)[name = string("op_1863")]; tensor q = add(x = var_1838, y = var_1863)[name = string("q")]; tensor k_27 = transpose(perm = k_27_perm_0, x = k_25)[name = string("transpose_2")]; tensor var_1866 = mul(x = k_27, y = cos_r_1)[name = string("op_1866")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_27)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_27)[name = string("x2")]; fp32 const_49_promoted = const()[name = string("const_49_promoted"), val = fp32(-0x1p+0)]; tensor var_1887 = mul(x = x2, y = const_49_promoted)[name = string("op_1887")]; int32 var_1889 = const()[name = string("op_1889"), val = int32(-1)]; bool var_1890_interleave_0 = const()[name = string("op_1890_interleave_0"), val = bool(false)]; tensor var_1890 = concat(axis = var_1889, interleave = var_1890_interleave_0, values = (var_1887, x1))[name = string("op_1890")]; tensor var_1891 = mul(x = var_1890, y = sin_r_1)[name = string("op_1891")]; tensor k = add(x = var_1866, y = var_1891)[name = string("k")]; tensor var_1898 = const()[name = string("op_1898"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat = reshape(shape = var_1898, x = k)[name = string("nk_flat")]; tensor var_1904 = const()[name = string("op_1904"), val = tensor([1, 1024, 1, 1])]; tensor v = transpose(perm = v_perm_0, x = v_17)[name = string("transpose_1")]; tensor nv_flat = reshape(shape = var_1904, x = v)[name = string("nv_flat")]; tensor var_1913 = mul(x = var_1707, y = var_464)[name = string("op_1913")]; tensor var_1914 = mul(x = nk_flat, y = update_mask_1)[name = string("op_1914")]; tensor key_cache_1 = add(x = var_1913, y = var_1914)[name = string("key_cache")]; tensor var_1920 = mul(x = var_1727, y = var_464)[name = string("op_1920")]; tensor var_1921 = mul(x = nv_flat, y = update_mask_1)[name = string("op_1921")]; tensor value_cache_1 = add(x = var_1920, y = var_1921)[name = string("value_cache")]; tensor var_1925_axes_0 = const()[name = string("op_1925_axes_0"), val = tensor([2])]; tensor var_1925 = squeeze(axes = var_1925_axes_0, x = key_cache_1)[name = string("op_1925")]; tensor var_1930 = const()[name = string("op_1930"), val = tensor([1, 8, 128, 16])]; tensor kc_17 = reshape(shape = var_1930, x = var_1925)[name = string("kc_17")]; tensor var_1933_axes_0 = const()[name = string("op_1933_axes_0"), val = tensor([2])]; tensor var_1933 = squeeze(axes = var_1933_axes_0, x = value_cache_1)[name = string("op_1933")]; tensor var_1938 = const()[name = string("op_1938"), val = tensor([1, 8, 128, 16])]; tensor vc_17 = reshape(shape = var_1938, x = var_1933)[name = string("vc_17")]; tensor var_1941_axes_0 = const()[name = string("op_1941_axes_0"), val = tensor([2])]; tensor var_1941 = expand_dims(axes = var_1941_axes_0, x = kc_17)[name = string("op_1941")]; tensor var_1949_reps_0 = const()[name = string("op_1949_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1949 = tile(reps = var_1949_reps_0, x = var_1941)[name = string("op_1949")]; tensor var_1954 = const()[name = string("op_1954"), val = tensor([1, 16, 128, 16])]; tensor kc = reshape(shape = var_1954, x = var_1949)[name = string("kc")]; tensor var_1957_axes_0 = const()[name = string("op_1957_axes_0"), val = tensor([2])]; tensor var_1957 = expand_dims(axes = var_1957_axes_0, x = vc_17)[name = string("op_1957")]; tensor var_1965_reps_0 = const()[name = string("op_1965_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1965 = tile(reps = var_1965_reps_0, x = var_1957)[name = string("op_1965")]; tensor var_1970 = const()[name = string("op_1970"), val = tensor([1, 16, 128, 16])]; tensor vc = reshape(shape = var_1970, x = var_1965)[name = string("vc")]; bool var_1972_transpose_x_0 = const()[name = string("op_1972_transpose_x_0"), val = bool(false)]; bool var_1972_transpose_y_0 = const()[name = string("op_1972_transpose_y_0"), val = bool(false)]; tensor var_1972 = matmul(transpose_x = var_1972_transpose_x_0, transpose_y = var_1972_transpose_y_0, x = q, y = kc)[name = string("op_1972")]; fp32 _inversed_attn_weights_33_y_0 = const()[name = string("_inversed_attn_weights_33_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_33 = mul(x = var_1972, y = _inversed_attn_weights_33_y_0)[name = string("_inversed_attn_weights_33")]; tensor attn_weights_35 = add(x = _inversed_attn_weights_33, y = mask_1)[name = string("attn_weights_35")]; int32 var_1986 = const()[name = string("op_1986"), val = int32(-1)]; tensor attn_weights = softmax(axis = var_1986, x = attn_weights_35)[name = string("attn_weights")]; bool attn_output_17_transpose_x_1 = const()[name = string("attn_output_17_transpose_x_1"), val = bool(false)]; bool attn_output_17_transpose_y_1 = const()[name = string("attn_output_17_transpose_y_1"), val = bool(true)]; tensor attn_output_17 = matmul(transpose_x = attn_output_17_transpose_x_1, transpose_y = attn_output_17_transpose_y_1, x = attn_weights, y = vc)[name = string("attn_output_17")]; tensor var_1995_perm_0 = const()[name = string("op_1995_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1999 = const()[name = string("op_1999"), val = tensor([1, 1, -1])]; tensor var_1995 = transpose(perm = var_1995_perm_0, x = attn_output_17)[name = string("transpose_0")]; tensor input_43 = reshape(shape = var_1999, x = var_1995)[name = string("input_43")]; tensor attn_output = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_o_proj_weight_palettized, x = input_43)[name = string("linear_31")]; tensor var_2005_axes_0 = const()[name = string("op_2005_axes_0"), val = tensor([0])]; tensor var_2005 = squeeze(axes = var_2005_axes_0, x = attn_output)[name = string("op_2005")]; tensor var_2007_axes_0 = const()[name = string("op_2007_axes_0"), val = tensor([0])]; tensor var_2007 = squeeze(axes = var_2007_axes_0, x = var_2005)[name = string("op_2007")]; tensor var_2009_axes_0 = const()[name = string("op_2009_axes_0"), val = tensor([-1])]; tensor var_2009 = expand_dims(axes = var_2009_axes_0, x = var_2007)[name = string("op_2009")]; tensor attn_4d_axes_0 = const()[name = string("attn_4d_axes_0"), val = tensor([-1])]; tensor attn_4d = expand_dims(axes = attn_4d_axes_0, x = var_2009)[name = string("attn_4d")]; tensor hidden_17 = add(x = hidden_15, y = attn_4d)[name = string("hidden_17")]; tensor var_2015_axes_0 = const()[name = string("op_2015_axes_0"), val = tensor([-1])]; tensor var_2015 = squeeze(axes = var_2015_axes_0, x = hidden_17)[name = string("op_2015")]; tensor var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor([-1])]; tensor var_2017 = squeeze(axes = var_2017_axes_0, x = var_2015)[name = string("op_2017")]; tensor hidden_states_115_axes_0 = const()[name = string("hidden_states_115_axes_0"), val = tensor([0])]; tensor hidden_states_115 = expand_dims(axes = hidden_states_115_axes_0, x = var_2017)[name = string("hidden_states_115")]; fp32 var_2023_promoted = const()[name = string("op_2023_promoted"), val = fp32(0x1p+1)]; tensor var_2029 = pow(x = hidden_states_115, y = var_2023_promoted)[name = string("op_2029")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2029)[name = string("variance_39")]; fp32 var_2032 = const()[name = string("op_2032"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2033 = add(x = variance_39, y = var_2032)[name = string("op_2033")]; fp32 var_2034_epsilon_0 = const()[name = string("op_2034_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2034 = rsqrt(epsilon = var_2034_epsilon_0, x = var_2033)[name = string("op_2034")]; tensor hidden_states_119 = mul(x = hidden_states_115, y = var_2034)[name = string("hidden_states_119")]; tensor const_50 = const()[name = string("const_50"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110226432)))]; tensor input_45 = mul(x = const_50, y = hidden_states_119)[name = string("input_45")]; tensor input_47 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_45)[name = string("linear_32")]; tensor var_2044 = silu(x = input_47)[name = string("op_2044")]; tensor var_2046 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_45)[name = string("linear_33")]; tensor input_49 = mul(x = var_2044, y = var_2046)[name = string("input_49")]; tensor mlp_out = linear(bias = linear_1_bias_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_49)[name = string("linear_34")]; tensor var_2051_axes_0 = const()[name = string("op_2051_axes_0"), val = tensor([0])]; tensor var_2051 = squeeze(axes = var_2051_axes_0, x = mlp_out)[name = string("op_2051")]; tensor var_2053_axes_0 = const()[name = string("op_2053_axes_0"), val = tensor([0])]; tensor var_2053 = squeeze(axes = var_2053_axes_0, x = var_2051)[name = string("op_2053")]; tensor var_2055_axes_0 = const()[name = string("op_2055_axes_0"), val = tensor([-1])]; tensor var_2055 = expand_dims(axes = var_2055_axes_0, x = var_2053)[name = string("op_2055")]; tensor mlp_4d_axes_0 = const()[name = string("mlp_4d_axes_0"), val = tensor([-1])]; tensor mlp_4d = expand_dims(axes = mlp_4d_axes_0, x = var_2055)[name = string("mlp_4d")]; tensor hidden_states_type_fp32 = add(x = hidden_17, y = mlp_4d)[name = string("hidden")]; tensor var_2061_axes_0 = const()[name = string("op_2061_axes_0"), val = tensor([-1])]; tensor var_2061 = squeeze(axes = var_2061_axes_0, x = hidden_states_type_fp32)[name = string("op_2061")]; tensor var_2063_axes_0 = const()[name = string("op_2063_axes_0"), val = tensor([-1])]; tensor var_2063 = squeeze(axes = var_2063_axes_0, x = var_2061)[name = string("op_2063")]; tensor hidden_states_121_axes_0 = const()[name = string("hidden_states_121_axes_0"), val = tensor([0])]; tensor hidden_states_121 = expand_dims(axes = hidden_states_121_axes_0, x = var_2063)[name = string("hidden_states_121")]; fp32 var_2069_promoted = const()[name = string("op_2069_promoted"), val = fp32(0x1p+1)]; tensor var_2075 = pow(x = hidden_states_121, y = var_2069_promoted)[name = string("op_2075")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2075)[name = string("variance")]; fp32 var_2078 = const()[name = string("op_2078"), val = fp32(0x1.0c6f7ap-20)]; tensor var_2079 = add(x = variance, y = var_2078)[name = string("op_2079")]; fp32 var_2080_epsilon_0 = const()[name = string("op_2080_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2080 = rsqrt(epsilon = var_2080_epsilon_0, x = var_2079)[name = string("op_2080")]; tensor hidden_states_1_1 = mul(x = hidden_states_121, y = var_2080)[name = string("hidden_states")]; tensor const_51 = const()[name = string("const_51"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110230592)))]; tensor input = mul(x = const_51, y = hidden_states_1_1)[name = string("input")]; tensor logits_1 = linear(bias = linear_0_bias_0, weight = lm_heads_0_weight_palettized, x = input)[name = string("linear_35")]; tensor var_2088_axes_0 = const()[name = string("op_2088_axes_0"), val = tensor([1])]; tensor var_2088 = squeeze(axes = var_2088_axes_0, x = logits_1)[name = string("op_2088")]; tensor logits_3 = linear(bias = linear_0_bias_0, weight = lm_heads_1_weight_palettized, x = input)[name = string("linear_36")]; tensor var_2093_axes_0 = const()[name = string("op_2093_axes_0"), val = tensor([1])]; tensor var_2093 = squeeze(axes = var_2093_axes_0, x = logits_3)[name = string("op_2093")]; tensor logits_5 = linear(bias = linear_0_bias_0, weight = lm_heads_2_weight_palettized, x = input)[name = string("linear_37")]; tensor var_2098_axes_0 = const()[name = string("op_2098_axes_0"), val = tensor([1])]; tensor var_2098 = squeeze(axes = var_2098_axes_0, x = logits_5)[name = string("op_2098")]; tensor logits_7 = linear(bias = linear_0_bias_0, weight = lm_heads_3_weight_palettized, x = input)[name = string("linear_38")]; tensor var_2103_axes_0 = const()[name = string("op_2103_axes_0"), val = tensor([1])]; tensor var_2103 = squeeze(axes = var_2103_axes_0, x = logits_7)[name = string("op_2103")]; tensor logits_9 = linear(bias = linear_0_bias_0, weight = lm_heads_4_weight_palettized, x = input)[name = string("linear_39")]; tensor var_2108_axes_0 = const()[name = string("op_2108_axes_0"), val = tensor([1])]; tensor var_2108 = squeeze(axes = var_2108_axes_0, x = logits_9)[name = string("op_2108")]; tensor logits_11 = linear(bias = linear_0_bias_0, weight = lm_heads_5_weight_palettized, x = input)[name = string("linear_40")]; tensor var_2113_axes_0 = const()[name = string("op_2113_axes_0"), val = tensor([1])]; tensor var_2113 = squeeze(axes = var_2113_axes_0, x = logits_11)[name = string("op_2113")]; tensor logits_13 = linear(bias = linear_0_bias_0, weight = lm_heads_6_weight_palettized, x = input)[name = string("linear_41")]; tensor var_2118_axes_0 = const()[name = string("op_2118_axes_0"), val = tensor([1])]; tensor var_2118 = squeeze(axes = var_2118_axes_0, x = logits_13)[name = string("op_2118")]; tensor logits_15 = linear(bias = linear_0_bias_0, weight = lm_heads_7_weight_palettized, x = input)[name = string("linear_42")]; tensor var_2123_axes_0 = const()[name = string("op_2123_axes_0"), val = tensor([1])]; tensor var_2123 = squeeze(axes = var_2123_axes_0, x = logits_15)[name = string("op_2123")]; tensor logits_17 = linear(bias = linear_0_bias_0, weight = lm_heads_8_weight_palettized, x = input)[name = string("linear_43")]; tensor var_2128_axes_0 = const()[name = string("op_2128_axes_0"), val = tensor([1])]; tensor var_2128 = squeeze(axes = var_2128_axes_0, x = logits_17)[name = string("op_2128")]; tensor logits_19 = linear(bias = linear_0_bias_0, weight = lm_heads_9_weight_palettized, x = input)[name = string("linear_44")]; tensor var_2133_axes_0 = const()[name = string("op_2133_axes_0"), val = tensor([1])]; tensor var_2133 = squeeze(axes = var_2133_axes_0, x = logits_19)[name = string("op_2133")]; tensor logits_21 = linear(bias = linear_0_bias_0, weight = lm_heads_10_weight_palettized, x = input)[name = string("linear_45")]; tensor var_2138_axes_0 = const()[name = string("op_2138_axes_0"), val = tensor([1])]; tensor var_2138 = squeeze(axes = var_2138_axes_0, x = logits_21)[name = string("op_2138")]; tensor logits_23 = linear(bias = linear_0_bias_0, weight = lm_heads_11_weight_palettized, x = input)[name = string("linear_46")]; tensor var_2143_axes_0 = const()[name = string("op_2143_axes_0"), val = tensor([1])]; tensor var_2143 = squeeze(axes = var_2143_axes_0, x = logits_23)[name = string("op_2143")]; tensor logits_25 = linear(bias = linear_0_bias_0, weight = lm_heads_12_weight_palettized, x = input)[name = string("linear_47")]; tensor var_2148_axes_0 = const()[name = string("op_2148_axes_0"), val = tensor([1])]; tensor var_2148 = squeeze(axes = var_2148_axes_0, x = logits_25)[name = string("op_2148")]; tensor logits_27 = linear(bias = linear_0_bias_0, weight = lm_heads_13_weight_palettized, x = input)[name = string("linear_48")]; tensor var_2153_axes_0 = const()[name = string("op_2153_axes_0"), val = tensor([1])]; tensor var_2153 = squeeze(axes = var_2153_axes_0, x = logits_27)[name = string("op_2153")]; tensor logits = linear(bias = linear_0_bias_0, weight = lm_heads_14_weight_palettized, x = input)[name = string("linear_49")]; tensor var_2158_axes_0 = const()[name = string("op_2158_axes_0"), val = tensor([1])]; tensor var_2158 = squeeze(axes = var_2158_axes_0, x = logits)[name = string("op_2158")]; int32 var_2161_axis_0 = const()[name = string("op_2161_axis_0"), val = int32(1)]; tensor all_logits_type_fp32 = stack(axis = var_2161_axis_0, values = (var_2088, var_2093, var_2098, var_2103, var_2108, var_2113, var_2118, var_2123, var_2128, var_2133, var_2138, var_2143, var_2148, var_2153, var_2158))[name = string("op_2161")]; int32 var_2163 = const()[name = string("op_2163"), val = int32(1)]; bool new_kv_1_interleave_0 = const()[name = string("new_kv_1_interleave_0"), val = bool(false)]; tensor new_kv_1 = concat(axis = var_2163, interleave = new_kv_1_interleave_0, values = (nk_flat_1, nk_flat_3, nk_flat_5, nk_flat_7, nk_flat))[name = string("new_kv_1")]; tensor var_2172 = mul(x = cast_1, y = var_464)[name = string("op_2172")]; tensor var_2173 = mul(x = new_kv_1, y = update_mask_1)[name = string("op_2173")]; tensor new_key_cache_type_fp32 = add(x = var_2172, y = var_2173)[name = string("op_2175")]; int32 var_2177 = const()[name = string("op_2177"), val = int32(1)]; bool new_kv_interleave_0 = const()[name = string("new_kv_interleave_0"), val = bool(false)]; tensor new_kv = concat(axis = var_2177, interleave = new_kv_interleave_0, values = (nv_flat_1, nv_flat_3, nv_flat_5, nv_flat_7, nv_flat))[name = string("new_kv")]; tensor var_2186 = mul(x = cast_4, y = var_464)[name = string("op_2186")]; tensor var_2187 = mul(x = new_kv, y = update_mask_1)[name = string("op_2187")]; tensor new_value_cache_type_fp32 = add(x = var_2186, y = var_2187)[name = string("op_2189")]; string cast_65_dtype_0 = const()[name = string("cast_65_dtype_0"), val = string("fp16")]; string cast_66_dtype_0 = const()[name = string("cast_66_dtype_0"), val = string("fp16")]; string cast_67_dtype_0 = const()[name = string("cast_67_dtype_0"), val = string("fp16")]; string cast_68_dtype_0 = const()[name = string("cast_68_dtype_0"), val = string("fp16")]; tensor all_logits = cast(dtype = cast_65_dtype_0, x = all_logits_type_fp32)[name = string("cast_0")]; tensor hidden_states = cast(dtype = cast_66_dtype_0, x = hidden_states_type_fp32)[name = string("cast_1")]; tensor new_key_cache = cast(dtype = cast_67_dtype_0, x = new_key_cache_type_fp32)[name = string("cast_2")]; tensor new_value_cache = cast(dtype = cast_68_dtype_0, x = new_value_cache_type_fp32)[name = string("cast_3")]; } -> (all_logits, hidden_states, new_key_cache, new_value_cache); }