program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp32")]; string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp32")]; string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")]; string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("fp32")]; string cast_4_dtype_0 = const()[name = string("cast_4_dtype_0"), val = string("fp32")]; tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2098368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3147008))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3148096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4196736))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; tensor layers_0_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4197824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6295040))))[name = string("layers_0_self_attn_o_proj_weight_palettized")]; tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6296128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9441920))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9443008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12588800))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12589888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15735680))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15736768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17833984))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17835072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18883712))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18884800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19933440))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; tensor layers_1_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19934528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22031744))))[name = string("layers_1_self_attn_o_proj_weight_palettized")]; tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22032832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25178624))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25179712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28325504))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31472384))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31473472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33570688))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34620416))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34621504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35670144))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; tensor layers_2_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35671232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37768448))))[name = string("layers_2_self_attn_o_proj_weight_palettized")]; tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37769536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40915328))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40916416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44062208))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44063296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47209088))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47210176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49307392))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49308480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50357120))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50358208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51406848))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; tensor layers_3_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51407936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53505152))))[name = string("layers_3_self_attn_o_proj_weight_palettized")]; tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53506240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56652032))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56653120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59800000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62945792))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62946880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65044096))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65045184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66093824))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66094912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67143552))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; tensor layers_4_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67144640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69241856))))[name = string("layers_4_self_attn_o_proj_weight_palettized")]; tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69242944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72388736))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72389824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75535616))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75536704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78682496))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; tensor lm_heads_0_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78683584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80780800))))[name = string("lm_heads_0_weight_palettized")]; tensor lm_heads_1_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80781888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82879104))))[name = string("lm_heads_1_weight_palettized")]; tensor lm_heads_2_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82880192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84977408))))[name = string("lm_heads_2_weight_palettized")]; tensor lm_heads_3_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84978496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87075712))))[name = string("lm_heads_3_weight_palettized")]; tensor lm_heads_4_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87076800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89174016))))[name = string("lm_heads_4_weight_palettized")]; tensor lm_heads_5_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89175104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91272320))))[name = string("lm_heads_5_weight_palettized")]; tensor lm_heads_6_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91273408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93370624))))[name = string("lm_heads_6_weight_palettized")]; tensor lm_heads_7_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93371712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95468928))))[name = string("lm_heads_7_weight_palettized")]; tensor lm_heads_8_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95470016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97567232))))[name = string("lm_heads_8_weight_palettized")]; tensor lm_heads_9_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97568320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99665536))))[name = string("lm_heads_9_weight_palettized")]; tensor lm_heads_10_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99666624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101763840))))[name = string("lm_heads_10_weight_palettized")]; tensor lm_heads_11_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101764928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103862144))))[name = string("lm_heads_11_weight_palettized")]; tensor lm_heads_12_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103863232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105960448))))[name = string("lm_heads_12_weight_palettized")]; tensor lm_heads_13_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105961536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108058752))))[name = string("lm_heads_13_weight_palettized")]; tensor lm_heads_14_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108059840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110157056))))[name = string("lm_heads_14_weight_palettized")]; tensor var_205_axes_0 = const()[name = string("op_205_axes_0"), val = tensor([0])]; tensor var_205 = expand_dims(axes = var_205_axes_0, x = cache_length)[name = string("op_205")]; string position_ids_dtype_0 = const()[name = string("position_ids_dtype_0"), val = string("fp32")]; tensor const_0 = const()[name = string("const_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158144)))]; tensor var_226_axes_0 = const()[name = string("op_226_axes_0"), val = tensor([-1])]; tensor position_ids = cast(dtype = position_ids_dtype_0, x = var_205)[name = string("cast_9")]; tensor var_226 = expand_dims(axes = var_226_axes_0, x = position_ids)[name = string("op_226")]; bool var_227_transpose_x_0 = const()[name = string("op_227_transpose_x_0"), val = bool(false)]; bool var_227_transpose_y_0 = const()[name = string("op_227_transpose_y_0"), val = bool(false)]; tensor var_227 = matmul(transpose_x = var_227_transpose_x_0, transpose_y = var_227_transpose_y_0, x = const_0, y = var_226)[name = string("op_227")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; int32 var_232 = const()[name = string("op_232"), val = int32(-1)]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs = transpose(perm = freqs_perm_0, x = var_227)[name = string("transpose_20")]; tensor emb = concat(axis = var_232, interleave = emb_interleave_0, values = (freqs, freqs))[name = string("emb")]; tensor var_234 = cos(x = emb)[name = string("op_234")]; tensor var_242 = sin(x = emb)[name = string("op_242")]; tensor var_259_begin_0 = const()[name = string("op_259_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_259_end_0 = const()[name = string("op_259_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_259_end_mask_0 = const()[name = string("op_259_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_1 = cast(dtype = cast_1_dtype_0, x = key_cache)[name = string("cast_8")]; tensor var_259 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = cast_1)[name = string("op_259")]; tensor var_279_begin_0 = const()[name = string("op_279_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_279_end_0 = const()[name = string("op_279_end_0"), val = tensor([1, 1024, 1, 16])]; tensor var_279_end_mask_0 = const()[name = string("op_279_end_mask_0"), val = tensor([true, false, true, true])]; tensor cast_4 = cast(dtype = cast_4_dtype_0, x = value_cache)[name = string("cast_7")]; tensor var_279 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = cast_4)[name = string("op_279")]; tensor var_291_axes_0 = const()[name = string("op_291_axes_0"), val = tensor([-1])]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = input_embeds)[name = string("cast_6")]; tensor var_291 = squeeze(axes = var_291_axes_0, x = cast_0)[name = string("op_291")]; tensor var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor([-1])]; tensor var_293 = squeeze(axes = var_293_axes_0, x = var_291)[name = string("op_293")]; tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([0])]; tensor hidden_states_1 = expand_dims(axes = hidden_states_1_axes_0, x = var_293)[name = string("hidden_states_1")]; fp32 var_299_promoted = const()[name = string("op_299_promoted"), val = fp32(0x1p+1)]; tensor var_305 = pow(x = hidden_states_1, y = var_299_promoted)[name = string("op_305")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_305)[name = string("variance_1")]; fp32 var_308 = const()[name = string("op_308"), val = fp32(0x1.0c6f7ap-20)]; tensor var_309 = add(x = variance_1, y = var_308)[name = string("op_309")]; fp32 var_310_epsilon_0 = const()[name = string("op_310_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_310 = rsqrt(epsilon = var_310_epsilon_0, x = var_309)[name = string("op_310")]; tensor hidden_states_5 = mul(x = hidden_states_1, y = var_310)[name = string("hidden_states_5")]; tensor const_1 = const()[name = string("const_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158464)))]; tensor input_1 = mul(x = const_1, y = hidden_states_5)[name = string("input_1")]; tensor linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110162624)))]; tensor var_316 = linear(bias = linear_0_bias_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = input_1)[name = string("linear_0")]; tensor var_321 = const()[name = string("op_321"), val = tensor([1, 1, 16, 128])]; tensor hidden_states_7 = reshape(shape = var_321, x = var_316)[name = string("hidden_states_7")]; tensor linear_1_bias_0 = const()[name = string("linear_1_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110170880)))]; tensor var_325 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = input_1)[name = string("linear_1")]; tensor var_330 = const()[name = string("op_330"), val = tensor([1, 1, 8, 128])]; tensor hidden_states_13 = reshape(shape = var_330, x = var_325)[name = string("hidden_states_13")]; tensor var_334 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = input_1)[name = string("linear_2")]; tensor var_339 = const()[name = string("op_339"), val = tensor([1, 1, 8, 128])]; tensor v_1 = reshape(shape = var_339, x = var_334)[name = string("v_1")]; fp32 var_344_promoted = const()[name = string("op_344_promoted"), val = fp32(0x1p+1)]; tensor var_350 = pow(x = hidden_states_7, y = var_344_promoted)[name = string("op_350")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_350)[name = string("variance_3")]; fp32 var_353 = const()[name = string("op_353"), val = fp32(0x1.0c6f7ap-20)]; tensor var_354 = add(x = variance_3, y = var_353)[name = string("op_354")]; fp32 var_355_epsilon_0 = const()[name = string("op_355_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_355 = rsqrt(epsilon = var_355_epsilon_0, x = var_354)[name = string("op_355")]; tensor hidden_states_11 = mul(x = hidden_states_7, y = var_355)[name = string("hidden_states_11")]; tensor const_2 = const()[name = string("const_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175040)))]; tensor q_1 = mul(x = const_2, y = hidden_states_11)[name = string("q_1")]; fp32 var_362_promoted = const()[name = string("op_362_promoted"), val = fp32(0x1p+1)]; tensor var_368 = pow(x = hidden_states_13, y = var_362_promoted)[name = string("op_368")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_368)[name = string("variance_5")]; fp32 var_371 = const()[name = string("op_371"), val = fp32(0x1.0c6f7ap-20)]; tensor var_372 = add(x = variance_5, y = var_371)[name = string("op_372")]; fp32 var_373_epsilon_0 = const()[name = string("op_373_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_373 = rsqrt(epsilon = var_373_epsilon_0, x = var_372)[name = string("op_373")]; tensor hidden_states_17 = mul(x = hidden_states_13, y = var_373)[name = string("hidden_states_17")]; tensor const_3 = const()[name = string("const_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175616)))]; tensor k_1 = mul(x = const_3, y = hidden_states_17)[name = string("k_1")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor v_3_perm_0 = const()[name = string("v_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_r_1_axes_0 = const()[name = string("cos_r_1_axes_0"), val = tensor([1])]; tensor cos_r_1 = expand_dims(axes = cos_r_1_axes_0, x = var_234)[name = string("cos_r_1")]; tensor sin_r_1_axes_0 = const()[name = string("sin_r_1_axes_0"), val = tensor([1])]; tensor sin_r_1 = expand_dims(axes = sin_r_1_axes_0, x = var_242)[name = string("sin_r_1")]; tensor q_3 = transpose(perm = q_3_perm_0, x = q_1)[name = string("transpose_19")]; tensor var_390 = mul(x = q_3, y = cos_r_1)[name = string("op_390")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_3)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_3)[name = string("x2_1")]; fp32 const_6_promoted = const()[name = string("const_6_promoted"), val = fp32(-0x1p+0)]; tensor var_411 = mul(x = x2_1, y = const_6_promoted)[name = string("op_411")]; int32 var_413 = const()[name = string("op_413"), val = int32(-1)]; bool var_414_interleave_0 = const()[name = string("op_414_interleave_0"), val = bool(false)]; tensor var_414 = concat(axis = var_413, interleave = var_414_interleave_0, values = (var_411, x1_1))[name = string("op_414")]; tensor var_415 = mul(x = var_414, y = sin_r_1)[name = string("op_415")]; tensor q_5 = add(x = var_390, y = var_415)[name = string("q_5")]; tensor k_3 = transpose(perm = k_3_perm_0, x = k_1)[name = string("transpose_18")]; tensor var_418 = mul(x = k_3, y = cos_r_1)[name = string("op_418")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_3)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_3)[name = string("x2_3")]; fp32 const_9_promoted = const()[name = string("const_9_promoted"), val = fp32(-0x1p+0)]; tensor var_439 = mul(x = x2_3, y = const_9_promoted)[name = string("op_439")]; int32 var_441 = const()[name = string("op_441"), val = int32(-1)]; bool var_442_interleave_0 = const()[name = string("op_442_interleave_0"), val = bool(false)]; tensor var_442 = concat(axis = var_441, interleave = var_442_interleave_0, values = (var_439, x1_3))[name = string("op_442")]; tensor var_443 = mul(x = var_442, y = sin_r_1)[name = string("op_443")]; tensor k_5 = add(x = var_418, y = var_443)[name = string("k_5")]; tensor var_450 = const()[name = string("op_450"), val = tensor([1, 1024, 1, 1])]; tensor nk_flat_1 = reshape(shape = var_450, x = k_5)[name = string("nk_flat_1")]; tensor var_456 = const()[name = string("op_456"), val = tensor([1, 1024, 1, 1])]; tensor v_3 = transpose(perm = v_3_perm_0, x = v_1)[name = string("transpose_17")]; tensor nv_flat_1 = reshape(shape = var_456, x = v_3)[name = string("nv_flat_1")]; tensor var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor([1])]; tensor cast_3 = cast(dtype = cast_3_dtype_0, x = kv_cache_update_mask)[name = string("cast_5")]; tensor var_459 = expand_dims(axes = var_459_axes_0, x = cast_3)[name = string("op_459")]; tensor update_mask_1_axes_0 = const()[name = string("update_mask_1_axes_0"), val = tensor([2])]; tensor update_mask_1 = expand_dims(axes = update_mask_1_axes_0, x = var_459)[name = string("update_mask_1")]; fp32 var_462 = const()[name = string("op_462"), val = fp32(0x1p+0)]; tensor var_464 = sub(x = var_462, y = update_mask_1)[name = string("op_464")]; tensor var_465 = mul(x = var_259, y = var_464)[name = string("op_465")]; tensor var_466 = mul(x = nk_flat_1, y = update_mask_1)[name = string("op_466")]; tensor key_cache_5 = add(x = var_465, y = var_466)[name = string("key_cache_5")]; tensor var_472 = mul(x = var_279, y = var_464)[name = string("op_472")]; tensor var_473 = mul(x = nv_flat_1, y = update_mask_1)[name = string("op_473")]; tensor value_cache_5 = add(x = var_472, y = var_473)[name = string("value_cache_5")]; tensor var_477_axes_0 = const()[name = string("op_477_axes_0"), val = tensor([2])]; tensor var_477 = squeeze(axes = var_477_axes_0, x = key_cache_5)[name = string("op_477")]; tensor var_482 = const()[name = string("op_482"), val = tensor([1, 8, 128, 16])]; tensor kc_1 = reshape(shape = var_482, x = var_477)[name = string("kc_1")]; tensor var_485_axes_0 = const()[name = string("op_485_axes_0"), val = tensor([2])]; tensor var_485 = squeeze(axes = var_485_axes_0, x = value_cache_5)[name = string("op_485")]; tensor var_490 = const()[name = string("op_490"), val = tensor([1, 8, 128, 16])]; tensor vc_1 = reshape(shape = var_490, x = var_485)[name = string("vc_1")]; tensor var_493_axes_0 = const()[name = string("op_493_axes_0"), val = tensor([2])]; tensor var_493 = expand_dims(axes = var_493_axes_0, x = kc_1)[name = string("op_493")]; tensor var_501_reps_0 = const()[name = string("op_501_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_501 = tile(reps = var_501_reps_0, x = var_493)[name = string("op_501")]; tensor var_506 = const()[name = string("op_506"), val = tensor([1, 16, 128, 16])]; tensor kc_3 = reshape(shape = var_506, x = var_501)[name = string("kc_3")]; tensor var_509_axes_0 = const()[name = string("op_509_axes_0"), val = tensor([2])]; tensor var_509 = expand_dims(axes = var_509_axes_0, x = vc_1)[name = string("op_509")]; tensor var_517_reps_0 = const()[name = string("op_517_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_517 = tile(reps = var_517_reps_0, x = var_509)[name = string("op_517")]; tensor var_522 = const()[name = string("op_522"), val = tensor([1, 16, 128, 16])]; tensor vc_3 = reshape(shape = var_522, x = var_517)[name = string("vc_3")]; bool var_524_transpose_x_0 = const()[name = string("op_524_transpose_x_0"), val = bool(false)]; bool var_524_transpose_y_0 = const()[name = string("op_524_transpose_y_0"), val = bool(false)]; tensor var_524 = matmul(transpose_x = var_524_transpose_x_0, transpose_y = var_524_transpose_y_0, x = q_5, y = kc_3)[name = string("op_524")]; fp32 _inversed_attn_weights_1_y_0 = const()[name = string("_inversed_attn_weights_1_y_0"), val = fp32(0x1.6a09e6p-4)]; tensor _inversed_attn_weights_1 = mul(x = var_524, y = _inversed_attn_weights_1_y_0)[name = string("_inversed_attn_weights_1")]; tensor var_528_axes_0 = const()[name = string("op_528_axes_0"), val = tensor([1])]; tensor cast_2 = cast(dtype = cast_2_dtype_0, x = key_padding_mask)[name = string("cast_4")]; tensor var_528 = expand_dims(axes = var_528_axes_0, x = cast_2)[name = string("op_528")]; tensor mask_1_axes_0 = const()[name = string("mask_1_axes_0"), val = tensor([2])]; tensor mask_1 = expand_dims(axes = mask_1_axes_0, x = var_528)[name = string("mask_1")]; tensor attn_weights_3 = add(x = _inversed_attn_weights_1, y = mask_1)[name = string("attn_weights_3")]; int32 var_538 = const()[name = string("op_538"), val = int32(-1)]; tensor attn_weights_7 = softmax(axis = var_538, x = attn_weights_3)[name = string("attn_weights_7")]; bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(false)]; bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(true)]; tensor attn_output_1 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_7, y = vc_3)[name = string("attn_output_1")]; tensor var_547_perm_0 = const()[name = string("op_547_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_551 = const()[name = string("op_551"), val = tensor([1, 1, -1])]; tensor var_547 = transpose(perm = var_547_perm_0, x = attn_output_1)[name = string("transpose_16")]; tensor input_3 = reshape(shape = var_551, x = var_547)[name = string("input_3")]; tensor attn_output_3 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_o_proj_weight_palettized, x = input_3)[name = string("linear_3")]; tensor var_557_axes_0 = const()[name = string("op_557_axes_0"), val = tensor([0])]; tensor var_557 = squeeze(axes = var_557_axes_0, x = attn_output_3)[name = string("op_557")]; tensor var_559_axes_0 = const()[name = string("op_559_axes_0"), val = tensor([0])]; tensor var_559 = squeeze(axes = var_559_axes_0, x = var_557)[name = string("op_559")]; tensor var_561_axes_0 = const()[name = string("op_561_axes_0"), val = tensor([-1])]; tensor var_561 = expand_dims(axes = var_561_axes_0, x = var_559)[name = string("op_561")]; tensor attn_4d_1_axes_0 = const()[name = string("attn_4d_1_axes_0"), val = tensor([-1])]; tensor attn_4d_1 = expand_dims(axes = attn_4d_1_axes_0, x = var_561)[name = string("attn_4d_1")]; tensor hidden_1 = add(x = cast_0, y = attn_4d_1)[name = string("hidden_1")]; tensor var_567_axes_0 = const()[name = string("op_567_axes_0"), val = tensor([-1])]; tensor var_567 = squeeze(axes = var_567_axes_0, x = hidden_1)[name = string("op_567")]; tensor var_569_axes_0 = const()[name = string("op_569_axes_0"), val = tensor([-1])]; tensor var_569 = squeeze(axes = var_569_axes_0, x = var_567)[name = string("op_569")]; tensor hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor([0])]; tensor hidden_states_19 = expand_dims(axes = hidden_states_19_axes_0, x = var_569)[name = string("hidden_states_19")]; fp32 var_575_promoted = const()[name = string("op_575_promoted"), val = fp32(0x1p+1)]; tensor var_581 = pow(x = hidden_states_19, y = var_575_promoted)[name = string("op_581")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_581)[name = string("variance_7")]; fp32 var_584 = const()[name = string("op_584"), val = fp32(0x1.0c6f7ap-20)]; tensor var_585 = add(x = variance_7, y = var_584)[name = string("op_585")]; fp32 var_586_epsilon_0 = const()[name = string("op_586_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_586 = rsqrt(epsilon = var_586_epsilon_0, x = var_585)[name = string("op_586")]; tensor hidden_states_23 = mul(x = hidden_states_19, y = var_586)[name = string("hidden_states_23")]; tensor const_10 = const()[name = string("const_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110176192)))]; tensor input_5 = mul(x = const_10, y = hidden_states_23)[name = string("input_5")]; tensor