program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor causal_mask, tensor conv_state_in, tensor input_ids, state> kv_cache_0, tensor position_ids, tensor update_mask) { tensor layers_14_self_attn_k_layernorm_weight = const()[name = string("layers_14_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor layers_14_self_attn_q_layernorm_weight = const()[name = string("layers_14_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256)))]; tensor layers_12_self_attn_k_layernorm_weight = const()[name = string("layers_12_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448)))]; tensor layers_12_self_attn_q_layernorm_weight = const()[name = string("layers_12_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640)))]; tensor layers_10_self_attn_k_layernorm_weight = const()[name = string("layers_10_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832)))]; tensor layers_10_self_attn_q_layernorm_weight = const()[name = string("layers_10_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1024)))]; tensor layers_8_self_attn_k_layernorm_weight = const()[name = string("layers_8_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1216)))]; tensor layers_8_self_attn_q_layernorm_weight = const()[name = string("layers_8_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1408)))]; tensor layers_5_self_attn_k_layernorm_weight = const()[name = string("layers_5_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1600)))]; tensor layers_5_self_attn_q_layernorm_weight = const()[name = string("layers_5_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1792)))]; tensor layers_2_self_attn_k_layernorm_weight = const()[name = string("layers_2_self_attn_k_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1984)))]; tensor layers_2_self_attn_q_layernorm_weight = const()[name = string("layers_2_self_attn_q_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2176)))]; tensor layers_0_operator_norm_weight = const()[name = string("layers_0_operator_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2368)))]; tensor sin_cached = const()[name = string("sin_cached"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4480)))]; tensor cos_cached = const()[name = string("cos_cached"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528832)))]; tensor layers_0_conv_in_proj_weight = const()[name = string("layers_0_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1053184)))]; tensor layers_0_feed_forward_w1_weight = const()[name = string("layers_0_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7344704)))]; tensor layers_0_feed_forward_w3_weight = const()[name = string("layers_0_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16781952)))]; tensor layers_0_feed_forward_w2_weight = const()[name = string("layers_0_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26219200)))]; tensor layers_1_conv_in_proj_weight = const()[name = string("layers_1_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35656448)))]; tensor layers_1_feed_forward_w1_weight = const()[name = string("layers_1_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41947968)))]; tensor layers_1_feed_forward_w3_weight = const()[name = string("layers_1_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51385216)))]; tensor layers_1_feed_forward_w2_weight = const()[name = string("layers_1_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60822464)))]; tensor layers_2_self_attn_q_proj_weight = const()[name = string("layers_2_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70259712)))]; tensor layers_2_self_attn_k_proj_weight = const()[name = string("layers_2_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72356928)))]; tensor layers_2_self_attn_v_proj_weight = const()[name = string("layers_2_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73405568)))]; tensor layers_2_feed_forward_w1_weight = const()[name = string("layers_2_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74454208)))]; tensor layers_2_feed_forward_w3_weight = const()[name = string("layers_2_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83891456)))]; tensor layers_2_feed_forward_w2_weight = const()[name = string("layers_2_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93328704)))]; tensor layers_3_conv_in_proj_weight = const()[name = string("layers_3_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102765952)))]; tensor layers_3_feed_forward_w1_weight = const()[name = string("layers_3_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109057472)))]; tensor layers_3_feed_forward_w3_weight = const()[name = string("layers_3_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118494720)))]; tensor layers_3_feed_forward_w2_weight = const()[name = string("layers_3_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127931968)))]; tensor layers_4_conv_in_proj_weight = const()[name = string("layers_4_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137369216)))]; tensor layers_4_feed_forward_w1_weight = const()[name = string("layers_4_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143660736)))]; tensor layers_4_feed_forward_w3_weight = const()[name = string("layers_4_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153097984)))]; tensor layers_4_feed_forward_w2_weight = const()[name = string("layers_4_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162535232)))]; tensor layers_5_self_attn_q_proj_weight = const()[name = string("layers_5_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171972480)))]; tensor layers_5_self_attn_k_proj_weight = const()[name = string("layers_5_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174069696)))]; tensor layers_5_self_attn_v_proj_weight = const()[name = string("layers_5_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175118336)))]; tensor layers_5_feed_forward_w1_weight = const()[name = string("layers_5_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176166976)))]; tensor layers_5_feed_forward_w3_weight = const()[name = string("layers_5_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185604224)))]; tensor layers_5_feed_forward_w2_weight = const()[name = string("layers_5_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195041472)))]; tensor layers_6_conv_in_proj_weight = const()[name = string("layers_6_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204478720)))]; tensor layers_6_feed_forward_w1_weight = const()[name = string("layers_6_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210770240)))]; tensor layers_6_feed_forward_w3_weight = const()[name = string("layers_6_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220207488)))]; tensor layers_6_feed_forward_w2_weight = const()[name = string("layers_6_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229644736)))]; tensor layers_7_conv_in_proj_weight = const()[name = string("layers_7_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239081984)))]; tensor layers_7_feed_forward_w1_weight = const()[name = string("layers_7_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245373504)))]; tensor layers_7_feed_forward_w3_weight = const()[name = string("layers_7_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254810752)))]; tensor layers_7_feed_forward_w2_weight = const()[name = string("layers_7_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264248000)))]; tensor layers_8_self_attn_q_proj_weight = const()[name = string("layers_8_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273685248)))]; tensor layers_8_self_attn_k_proj_weight = const()[name = string("layers_8_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275782464)))]; tensor layers_8_self_attn_v_proj_weight = const()[name = string("layers_8_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276831104)))]; tensor layers_8_feed_forward_w1_weight = const()[name = string("layers_8_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277879744)))]; tensor layers_8_feed_forward_w3_weight = const()[name = string("layers_8_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287316992)))]; tensor layers_8_feed_forward_w2_weight = const()[name = string("layers_8_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296754240)))]; tensor layers_9_conv_in_proj_weight = const()[name = string("layers_9_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306191488)))]; tensor layers_9_feed_forward_w1_weight = const()[name = string("layers_9_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312483008)))]; tensor layers_9_feed_forward_w3_weight = const()[name = string("layers_9_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321920256)))]; tensor layers_9_feed_forward_w2_weight = const()[name = string("layers_9_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331357504)))]; tensor layers_10_self_attn_q_proj_weight = const()[name = string("layers_10_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340794752)))]; tensor layers_10_self_attn_k_proj_weight = const()[name = string("layers_10_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342891968)))]; tensor layers_10_self_attn_v_proj_weight = const()[name = string("layers_10_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343940608)))]; tensor layers_10_feed_forward_w1_weight = const()[name = string("layers_10_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344989248)))]; tensor layers_10_feed_forward_w3_weight = const()[name = string("layers_10_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354426496)))]; tensor layers_10_feed_forward_w2_weight = const()[name = string("layers_10_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363863744)))]; tensor layers_11_conv_in_proj_weight = const()[name = string("layers_11_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373300992)))]; tensor layers_11_feed_forward_w1_weight = const()[name = string("layers_11_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379592512)))]; tensor layers_11_feed_forward_w3_weight = const()[name = string("layers_11_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389029760)))]; tensor layers_11_feed_forward_w2_weight = const()[name = string("layers_11_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398467008)))]; tensor layers_12_self_attn_q_proj_weight = const()[name = string("layers_12_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407904256)))]; tensor layers_12_self_attn_k_proj_weight = const()[name = string("layers_12_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410001472)))]; tensor layers_12_self_attn_v_proj_weight = const()[name = string("layers_12_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411050112)))]; tensor layers_12_feed_forward_w1_weight = const()[name = string("layers_12_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412098752)))]; tensor layers_12_feed_forward_w3_weight = const()[name = string("layers_12_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421536000)))]; tensor layers_12_feed_forward_w2_weight = const()[name = string("layers_12_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430973248)))]; tensor layers_13_conv_in_proj_weight = const()[name = string("layers_13_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440410496)))]; tensor layers_13_feed_forward_w1_weight = const()[name = string("layers_13_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446702016)))]; tensor layers_13_feed_forward_w3_weight = const()[name = string("layers_13_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456139264)))]; tensor layers_13_feed_forward_w2_weight = const()[name = string("layers_13_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465576512)))]; tensor layers_14_self_attn_q_proj_weight = const()[name = string("layers_14_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475013760)))]; tensor layers_14_self_attn_k_proj_weight = const()[name = string("layers_14_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477110976)))]; tensor layers_14_self_attn_v_proj_weight = const()[name = string("layers_14_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478159616)))]; tensor layers_14_feed_forward_w1_weight = const()[name = string("layers_14_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479208256)))]; tensor layers_14_feed_forward_w3_weight = const()[name = string("layers_14_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488645504)))]; tensor layers_14_feed_forward_w2_weight = const()[name = string("layers_14_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498082752)))]; tensor layers_15_conv_in_proj_weight = const()[name = string("layers_15_conv_in_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(507520000)))]; tensor layers_15_feed_forward_w1_weight = const()[name = string("layers_15_feed_forward_w1_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513811520)))]; tensor layers_15_feed_forward_w3_weight = const()[name = string("layers_15_feed_forward_w3_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523248768)))]; tensor layers_15_feed_forward_w2_weight = const()[name = string("layers_15_feed_forward_w2_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532686016)))]; tensor lm_head_weight = const()[name = string("lm_head_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542123264)))]; int32 var_612_batch_dims_0 = const()[name = string("op_612_batch_dims_0"), val = int32(0)]; bool var_612_validate_indices_0 = const()[name = string("op_612_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676341056)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_12 = const()[name = string("slice_by_index_12"), val = int32(65536)]; tensor add_0 = add(x = input_ids, y = slice_by_index_12)[name = string("add_0")]; tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_612_cast_fp16_axis_0 = const()[name = string("op_612_cast_fp16_axis_0"), val = int32(0)]; tensor var_612_cast_fp16 = gather(axis = var_612_cast_fp16_axis_0, batch_dims = var_612_batch_dims_0, indices = select_0, validate_indices = var_612_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_612_cast_fp16")]; int32 var_618 = const()[name = string("op_618"), val = int32(0)]; int32 var_619_batch_dims_0 = const()[name = string("op_619_batch_dims_0"), val = int32(0)]; bool var_619_validate_indices_0 = const()[name = string("op_619_validate_indices_0"), val = bool(false)]; string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_44")]; tensor var_619_cast_uint16 = gather(axis = var_618, batch_dims = var_619_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_619_validate_indices_0, x = cos_cached)[name = string("op_619_cast_uint16")]; tensor var_624 = const()[name = string("op_624"), val = tensor([1, 1, 1, 64])]; tensor cos = reshape(shape = var_624, x = var_619_cast_uint16)[name = string("cos")]; int32 var_626 = const()[name = string("op_626"), val = int32(0)]; int32 var_627_batch_dims_0 = const()[name = string("op_627_batch_dims_0"), val = int32(0)]; bool var_627_validate_indices_0 = const()[name = string("op_627_validate_indices_0"), val = bool(false)]; tensor var_627_cast_uint16 = gather(axis = var_626, batch_dims = var_627_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_627_validate_indices_0, x = sin_cached)[name = string("op_627_cast_uint16")]; tensor var_632 = const()[name = string("op_632"), val = tensor([1, 1, 1, 64])]; tensor sin = reshape(shape = var_632, x = var_627_cast_uint16)[name = string("sin")]; fp16 const_0_promoted = const()[name = string("const_0_promoted"), val = fp16(-0x1p+0)]; tensor var_634 = mul(x = var_612_cast_fp16, y = const_0_promoted)[name = string("op_634")]; int32 var_636 = const()[name = string("op_636"), val = int32(-1)]; bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; tensor input_1 = concat(axis = var_636, interleave = input_1_interleave_0, values = (var_612_cast_fp16, var_634))[name = string("input_1")]; tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; fp16 var_642_to_fp16 = const()[name = string("op_642_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_642_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")]; tensor var_645_split_sizes_0 = const()[name = string("op_645_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_645_axis_0 = const()[name = string("op_645_axis_0"), val = int32(-1)]; tensor var_645_0, tensor var_645_1 = split(axis = var_645_axis_0, split_sizes = var_645_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_645")]; tensor hidden_states_1 = mul(x = var_645_0, y = layers_0_operator_norm_weight)[name = string("hidden_states_1")]; tensor var_651 = const()[name = string("op_651"), val = tensor([0, 2, 1])]; tensor var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor([2])]; tensor var_652 = transpose(perm = var_651, x = hidden_states_1)[name = string("transpose_161")]; tensor var_654 = expand_dims(axes = var_654_axes_0, x = var_652)[name = string("op_654")]; string BCx_1_pad_type_0 = const()[name = string("BCx_1_pad_type_0"), val = string("valid")]; tensor BCx_1_strides_0 = const()[name = string("BCx_1_strides_0"), val = tensor([1, 1])]; tensor BCx_1_pad_0 = const()[name = string("BCx_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_1_dilations_0 = const()[name = string("BCx_1_dilations_0"), val = tensor([1, 1])]; int32 BCx_1_groups_0 = const()[name = string("BCx_1_groups_0"), val = int32(1)]; tensor BCx_1 = conv(dilations = BCx_1_dilations_0, groups = BCx_1_groups_0, pad = BCx_1_pad_0, pad_type = BCx_1_pad_type_0, strides = BCx_1_strides_0, weight = layers_0_conv_in_proj_weight, x = var_654)[name = string("BCx_1")]; tensor var_671_split_sizes_0 = const()[name = string("op_671_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_671_axis_0 = const()[name = string("op_671_axis_0"), val = int32(1)]; tensor var_671_0, tensor var_671_1, tensor var_671_2 = split(axis = var_671_axis_0, split_sizes = var_671_split_sizes_0, x = BCx_1)[name = string("op_671")]; tensor Bx_1 = mul(x = var_671_0, y = var_671_2)[name = string("Bx_1")]; tensor var_677_begin_0 = const()[name = string("op_677_begin_0"), val = tensor([0, 0, 0])]; tensor var_677_end_0 = const()[name = string("op_677_end_0"), val = tensor([1, 1024, 3])]; tensor var_677_end_mask_0 = const()[name = string("op_677_end_mask_0"), val = tensor([false, true, true])]; tensor var_677_squeeze_mask_0 = const()[name = string("op_677_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_677_cast_fp16 = slice_by_index(begin = var_677_begin_0, end = var_677_end_0, end_mask = var_677_end_mask_0, squeeze_mask = var_677_squeeze_mask_0, x = conv_state_in)[name = string("op_677_cast_fp16")]; tensor var_679_axes_0 = const()[name = string("op_679_axes_0"), val = tensor([0])]; tensor var_679_cast_fp16 = expand_dims(axes = var_679_axes_0, x = var_677_cast_fp16)[name = string("op_679_cast_fp16")]; tensor slot_1_axes_0 = const()[name = string("slot_1_axes_0"), val = tensor([2])]; tensor slot_1_cast_fp16 = expand_dims(axes = slot_1_axes_0, x = var_679_cast_fp16)[name = string("slot_1_cast_fp16")]; tensor live_tail_1_begin_0 = const()[name = string("live_tail_1_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_1_end_0 = const()[name = string("live_tail_1_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_1_end_mask_0 = const()[name = string("live_tail_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_1_cast_fp16 = slice_by_index(begin = live_tail_1_begin_0, end = live_tail_1_end_0, end_mask = live_tail_1_end_mask_0, x = slot_1_cast_fp16)[name = string("live_tail_1_cast_fp16")]; int32 var_688 = const()[name = string("op_688"), val = int32(-1)]; bool new_state_1_interleave_0 = const()[name = string("new_state_1_interleave_0"), val = bool(false)]; tensor new_state_1_cast_fp16 = concat(axis = var_688, interleave = new_state_1_interleave_0, values = (live_tail_1_cast_fp16, Bx_1))[name = string("new_state_1_cast_fp16")]; tensor var_691_axes_0 = const()[name = string("op_691_axes_0"), val = tensor([0])]; tensor var_691_cast_fp16 = squeeze(axes = var_691_axes_0, x = new_state_1_cast_fp16)[name = string("op_691_cast_fp16")]; tensor var_693_axes_0 = const()[name = string("op_693_axes_0"), val = tensor([1])]; tensor var_693_cast_fp16 = squeeze(axes = var_693_axes_0, x = var_691_cast_fp16)[name = string("op_693_cast_fp16")]; string conv_out_1_pad_type_0 = const()[name = string("conv_out_1_pad_type_0"), val = string("valid")]; int32 conv_out_1_groups_0 = const()[name = string("conv_out_1_groups_0"), val = int32(1024)]; tensor conv_out_1_strides_0 = const()[name = string("conv_out_1_strides_0"), val = tensor([1, 1])]; tensor conv_out_1_pad_0 = const()[name = string("conv_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_1_dilations_0 = const()[name = string("conv_out_1_dilations_0"), val = tensor([1, 1])]; tensor layers_0_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_0_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810558848)))]; tensor conv_out_1_cast_fp16 = conv(dilations = conv_out_1_dilations_0, groups = conv_out_1_groups_0, pad = conv_out_1_pad_0, pad_type = conv_out_1_pad_type_0, strides = conv_out_1_strides_0, weight = layers_0_conv_conv_weight_promoted_to_fp16, x = new_state_1_cast_fp16)[name = string("conv_out_1_cast_fp16")]; tensor input_5_cast_fp16 = mul(x = var_671_1, y = conv_out_1_cast_fp16)[name = string("input_5_cast_fp16")]; string y_1_pad_type_0 = const()[name = string("y_1_pad_type_0"), val = string("valid")]; tensor y_1_strides_0 = const()[name = string("y_1_strides_0"), val = tensor([1, 1])]; tensor y_1_pad_0 = const()[name = string("y_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_1_dilations_0 = const()[name = string("y_1_dilations_0"), val = tensor([1, 1])]; int32 y_1_groups_0 = const()[name = string("y_1_groups_0"), val = int32(1)]; tensor layers_0_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_0_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810565056)))]; tensor y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = layers_0_conv_out_proj_weight_promoted_to_fp16, x = input_5_cast_fp16)[name = string("y_1_cast_fp16")]; tensor var_719_axes_0 = const()[name = string("op_719_axes_0"), val = tensor([2])]; tensor var_719_cast_fp16 = squeeze(axes = var_719_axes_0, x = y_1_cast_fp16)[name = string("op_719_cast_fp16")]; tensor var_723 = const()[name = string("op_723"), val = tensor([0, 2, 1])]; tensor op_out_1_cast_fp16 = transpose(perm = var_723, x = var_719_cast_fp16)[name = string("transpose_160")]; tensor x_3_cast_fp16 = add(x = var_612_cast_fp16, y = op_out_1_cast_fp16)[name = string("x_3_cast_fp16")]; fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_727_cast_fp16 = mul(x = x_3_cast_fp16, y = const_1_promoted_to_fp16)[name = string("op_727_cast_fp16")]; int32 var_729 = const()[name = string("op_729"), val = int32(-1)]; bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; tensor input_7_cast_fp16 = concat(axis = var_729, interleave = input_7_interleave_0, values = (x_3_cast_fp16, var_727_cast_fp16))[name = string("input_7_cast_fp16")]; tensor normed_3_axes_0 = const()[name = string("normed_3_axes_0"), val = tensor([-1])]; fp16 var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_3_cast_fp16 = layer_norm(axes = normed_3_axes_0, epsilon = var_735_to_fp16, x = input_7_cast_fp16)[name = string("normed_3_cast_fp16")]; tensor var_738_split_sizes_0 = const()[name = string("op_738_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_738_axis_0 = const()[name = string("op_738_axis_0"), val = int32(-1)]; tensor var_738_cast_fp16_0, tensor var_738_cast_fp16_1 = split(axis = var_738_axis_0, split_sizes = var_738_split_sizes_0, x = normed_3_cast_fp16)[name = string("op_738_cast_fp16")]; tensor layers_0_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812662272)))]; tensor normed_5_cast_fp16 = mul(x = var_738_cast_fp16_0, y = layers_0_ffn_norm_weight_promoted_to_fp16)[name = string("normed_5_cast_fp16")]; tensor var_744 = const()[name = string("op_744"), val = tensor([0, 2, 1])]; tensor var_747_axes_0 = const()[name = string("op_747_axes_0"), val = tensor([2])]; tensor var_745_cast_fp16 = transpose(perm = var_744, x = normed_5_cast_fp16)[name = string("transpose_159")]; tensor var_747_cast_fp16 = expand_dims(axes = var_747_axes_0, x = var_745_cast_fp16)[name = string("op_747_cast_fp16")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = layers_0_feed_forward_w1_weight, x = var_747_cast_fp16)[name = string("input_11")]; string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = layers_0_feed_forward_w3_weight, x = var_747_cast_fp16)[name = string("b_1")]; tensor var_775 = silu(x = input_11)[name = string("op_775")]; tensor input_13 = mul(x = var_775, y = b_1)[name = string("input_13")]; string mlp_1_pad_type_0 = const()[name = string("mlp_1_pad_type_0"), val = string("valid")]; tensor mlp_1_strides_0 = const()[name = string("mlp_1_strides_0"), val = tensor([1, 1])]; tensor mlp_1_pad_0 = const()[name = string("mlp_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_1_dilations_0 = const()[name = string("mlp_1_dilations_0"), val = tensor([1, 1])]; int32 mlp_1_groups_0 = const()[name = string("mlp_1_groups_0"), val = int32(1)]; tensor mlp_1 = conv(dilations = mlp_1_dilations_0, groups = mlp_1_groups_0, pad = mlp_1_pad_0, pad_type = mlp_1_pad_type_0, strides = mlp_1_strides_0, weight = layers_0_feed_forward_w2_weight, x = input_13)[name = string("mlp_1")]; tensor var_789_axes_0 = const()[name = string("op_789_axes_0"), val = tensor([2])]; tensor var_789 = squeeze(axes = var_789_axes_0, x = mlp_1)[name = string("op_789")]; tensor var_793 = const()[name = string("op_793"), val = tensor([0, 2, 1])]; tensor mlp_3 = transpose(perm = var_793, x = var_789)[name = string("transpose_158")]; tensor x_5_cast_fp16 = add(x = x_3_cast_fp16, y = mlp_3)[name = string("x_5_cast_fp16")]; fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_797_cast_fp16 = mul(x = x_5_cast_fp16, y = const_2_promoted_to_fp16)[name = string("op_797_cast_fp16")]; int32 var_799 = const()[name = string("op_799"), val = int32(-1)]; bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)]; tensor input_15_cast_fp16 = concat(axis = var_799, interleave = input_15_interleave_0, values = (x_5_cast_fp16, var_797_cast_fp16))[name = string("input_15_cast_fp16")]; tensor normed_7_axes_0 = const()[name = string("normed_7_axes_0"), val = tensor([-1])]; fp16 var_805_to_fp16 = const()[name = string("op_805_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_7_cast_fp16 = layer_norm(axes = normed_7_axes_0, epsilon = var_805_to_fp16, x = input_15_cast_fp16)[name = string("normed_7_cast_fp16")]; tensor var_808_split_sizes_0 = const()[name = string("op_808_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_808_axis_0 = const()[name = string("op_808_axis_0"), val = int32(-1)]; tensor var_808_cast_fp16_0, tensor var_808_cast_fp16_1 = split(axis = var_808_axis_0, split_sizes = var_808_split_sizes_0, x = normed_7_cast_fp16)[name = string("op_808_cast_fp16")]; tensor layers_1_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812664384)))]; tensor hidden_states_3_cast_fp16 = mul(x = var_808_cast_fp16_0, y = layers_1_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor var_814 = const()[name = string("op_814"), val = tensor([0, 2, 1])]; tensor var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor([2])]; tensor var_815_cast_fp16 = transpose(perm = var_814, x = hidden_states_3_cast_fp16)[name = string("transpose_157")]; tensor var_817_cast_fp16 = expand_dims(axes = var_817_axes_0, x = var_815_cast_fp16)[name = string("op_817_cast_fp16")]; string BCx_3_pad_type_0 = const()[name = string("BCx_3_pad_type_0"), val = string("valid")]; tensor BCx_3_strides_0 = const()[name = string("BCx_3_strides_0"), val = tensor([1, 1])]; tensor BCx_3_pad_0 = const()[name = string("BCx_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_3_dilations_0 = const()[name = string("BCx_3_dilations_0"), val = tensor([1, 1])]; int32 BCx_3_groups_0 = const()[name = string("BCx_3_groups_0"), val = int32(1)]; tensor BCx_3 = conv(dilations = BCx_3_dilations_0, groups = BCx_3_groups_0, pad = BCx_3_pad_0, pad_type = BCx_3_pad_type_0, strides = BCx_3_strides_0, weight = layers_1_conv_in_proj_weight, x = var_817_cast_fp16)[name = string("BCx_3")]; tensor var_834_split_sizes_0 = const()[name = string("op_834_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_834_axis_0 = const()[name = string("op_834_axis_0"), val = int32(1)]; tensor var_834_0, tensor var_834_1, tensor var_834_2 = split(axis = var_834_axis_0, split_sizes = var_834_split_sizes_0, x = BCx_3)[name = string("op_834")]; tensor Bx_3 = mul(x = var_834_0, y = var_834_2)[name = string("Bx_3")]; tensor var_840_begin_0 = const()[name = string("op_840_begin_0"), val = tensor([1, 0, 0])]; tensor var_840_end_0 = const()[name = string("op_840_end_0"), val = tensor([2, 1024, 3])]; tensor var_840_end_mask_0 = const()[name = string("op_840_end_mask_0"), val = tensor([false, true, true])]; tensor var_840_squeeze_mask_0 = const()[name = string("op_840_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = var_840_end_0, end_mask = var_840_end_mask_0, squeeze_mask = var_840_squeeze_mask_0, x = conv_state_in)[name = string("op_840_cast_fp16")]; tensor var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor([0])]; tensor var_842_cast_fp16 = expand_dims(axes = var_842_axes_0, x = var_840_cast_fp16)[name = string("op_842_cast_fp16")]; tensor slot_3_axes_0 = const()[name = string("slot_3_axes_0"), val = tensor([2])]; tensor slot_3_cast_fp16 = expand_dims(axes = slot_3_axes_0, x = var_842_cast_fp16)[name = string("slot_3_cast_fp16")]; tensor live_tail_3_begin_0 = const()[name = string("live_tail_3_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_3_end_0 = const()[name = string("live_tail_3_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_3_end_mask_0 = const()[name = string("live_tail_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_3_cast_fp16 = slice_by_index(begin = live_tail_3_begin_0, end = live_tail_3_end_0, end_mask = live_tail_3_end_mask_0, x = slot_3_cast_fp16)[name = string("live_tail_3_cast_fp16")]; int32 var_851 = const()[name = string("op_851"), val = int32(-1)]; bool new_state_3_interleave_0 = const()[name = string("new_state_3_interleave_0"), val = bool(false)]; tensor new_state_3_cast_fp16 = concat(axis = var_851, interleave = new_state_3_interleave_0, values = (live_tail_3_cast_fp16, Bx_3))[name = string("new_state_3_cast_fp16")]; tensor var_854_axes_0 = const()[name = string("op_854_axes_0"), val = tensor([0])]; tensor var_854_cast_fp16 = squeeze(axes = var_854_axes_0, x = new_state_3_cast_fp16)[name = string("op_854_cast_fp16")]; tensor var_856_axes_0 = const()[name = string("op_856_axes_0"), val = tensor([1])]; tensor var_856_cast_fp16 = squeeze(axes = var_856_axes_0, x = var_854_cast_fp16)[name = string("op_856_cast_fp16")]; string conv_out_3_pad_type_0 = const()[name = string("conv_out_3_pad_type_0"), val = string("valid")]; int32 conv_out_3_groups_0 = const()[name = string("conv_out_3_groups_0"), val = int32(1024)]; tensor conv_out_3_strides_0 = const()[name = string("conv_out_3_strides_0"), val = tensor([1, 1])]; tensor conv_out_3_pad_0 = const()[name = string("conv_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_3_dilations_0 = const()[name = string("conv_out_3_dilations_0"), val = tensor([1, 1])]; tensor layers_1_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_1_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812666496)))]; tensor conv_out_3_cast_fp16 = conv(dilations = conv_out_3_dilations_0, groups = conv_out_3_groups_0, pad = conv_out_3_pad_0, pad_type = conv_out_3_pad_type_0, strides = conv_out_3_strides_0, weight = layers_1_conv_conv_weight_promoted_to_fp16, x = new_state_3_cast_fp16)[name = string("conv_out_3_cast_fp16")]; tensor input_19_cast_fp16 = mul(x = var_834_1, y = conv_out_3_cast_fp16)[name = string("input_19_cast_fp16")]; string y_3_pad_type_0 = const()[name = string("y_3_pad_type_0"), val = string("valid")]; tensor y_3_strides_0 = const()[name = string("y_3_strides_0"), val = tensor([1, 1])]; tensor y_3_pad_0 = const()[name = string("y_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_3_dilations_0 = const()[name = string("y_3_dilations_0"), val = tensor([1, 1])]; int32 y_3_groups_0 = const()[name = string("y_3_groups_0"), val = int32(1)]; tensor layers_1_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_1_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812672704)))]; tensor y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = layers_1_conv_out_proj_weight_promoted_to_fp16, x = input_19_cast_fp16)[name = string("y_3_cast_fp16")]; tensor var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor([2])]; tensor var_882_cast_fp16 = squeeze(axes = var_882_axes_0, x = y_3_cast_fp16)[name = string("op_882_cast_fp16")]; tensor var_886 = const()[name = string("op_886"), val = tensor([0, 2, 1])]; tensor op_out_3_cast_fp16 = transpose(perm = var_886, x = var_882_cast_fp16)[name = string("transpose_156")]; tensor x_7_cast_fp16 = add(x = x_5_cast_fp16, y = op_out_3_cast_fp16)[name = string("x_7_cast_fp16")]; fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_890_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_890_cast_fp16")]; int32 var_892 = const()[name = string("op_892"), val = int32(-1)]; bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; tensor input_21_cast_fp16 = concat(axis = var_892, interleave = input_21_interleave_0, values = (x_7_cast_fp16, var_890_cast_fp16))[name = string("input_21_cast_fp16")]; tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; fp16 var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_898_to_fp16, x = input_21_cast_fp16)[name = string("normed_9_cast_fp16")]; tensor var_901_split_sizes_0 = const()[name = string("op_901_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_901_axis_0 = const()[name = string("op_901_axis_0"), val = int32(-1)]; tensor var_901_cast_fp16_0, tensor var_901_cast_fp16_1 = split(axis = var_901_axis_0, split_sizes = var_901_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_901_cast_fp16")]; tensor layers_1_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814769920)))]; tensor normed_11_cast_fp16 = mul(x = var_901_cast_fp16_0, y = layers_1_ffn_norm_weight_promoted_to_fp16)[name = string("normed_11_cast_fp16")]; tensor var_907 = const()[name = string("op_907"), val = tensor([0, 2, 1])]; tensor var_910_axes_0 = const()[name = string("op_910_axes_0"), val = tensor([2])]; tensor var_908_cast_fp16 = transpose(perm = var_907, x = normed_11_cast_fp16)[name = string("transpose_155")]; tensor var_910_cast_fp16 = expand_dims(axes = var_910_axes_0, x = var_908_cast_fp16)[name = string("op_910_cast_fp16")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = layers_1_feed_forward_w1_weight, x = var_910_cast_fp16)[name = string("input_25")]; string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = layers_1_feed_forward_w3_weight, x = var_910_cast_fp16)[name = string("b_3")]; tensor var_938 = silu(x = input_25)[name = string("op_938")]; tensor input_27 = mul(x = var_938, y = b_3)[name = string("input_27")]; string mlp_5_pad_type_0 = const()[name = string("mlp_5_pad_type_0"), val = string("valid")]; tensor mlp_5_strides_0 = const()[name = string("mlp_5_strides_0"), val = tensor([1, 1])]; tensor mlp_5_pad_0 = const()[name = string("mlp_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_5_dilations_0 = const()[name = string("mlp_5_dilations_0"), val = tensor([1, 1])]; int32 mlp_5_groups_0 = const()[name = string("mlp_5_groups_0"), val = int32(1)]; tensor mlp_5 = conv(dilations = mlp_5_dilations_0, groups = mlp_5_groups_0, pad = mlp_5_pad_0, pad_type = mlp_5_pad_type_0, strides = mlp_5_strides_0, weight = layers_1_feed_forward_w2_weight, x = input_27)[name = string("mlp_5")]; tensor var_952_axes_0 = const()[name = string("op_952_axes_0"), val = tensor([2])]; tensor var_952 = squeeze(axes = var_952_axes_0, x = mlp_5)[name = string("op_952")]; tensor var_956 = const()[name = string("op_956"), val = tensor([0, 2, 1])]; tensor mlp_7 = transpose(perm = var_956, x = var_952)[name = string("transpose_154")]; tensor x_9_cast_fp16 = add(x = x_7_cast_fp16, y = mlp_7)[name = string("x_9_cast_fp16")]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_960_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_960_cast_fp16")]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; tensor input_29_cast_fp16 = concat(axis = var_962, interleave = input_29_interleave_0, values = (x_9_cast_fp16, var_960_cast_fp16))[name = string("input_29_cast_fp16")]; tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; fp16 var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_968_to_fp16, x = input_29_cast_fp16)[name = string("normed_13_cast_fp16")]; tensor var_971_split_sizes_0 = const()[name = string("op_971_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_971_axis_0 = const()[name = string("op_971_axis_0"), val = int32(-1)]; tensor var_971_cast_fp16_0, tensor var_971_cast_fp16_1 = split(axis = var_971_axis_0, split_sizes = var_971_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_971_cast_fp16")]; tensor layers_2_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814772032)))]; tensor hidden_states_5_cast_fp16 = mul(x = var_971_cast_fp16_0, y = layers_2_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor var_977 = const()[name = string("op_977"), val = tensor([0, 2, 1])]; tensor var_980_axes_0 = const()[name = string("op_980_axes_0"), val = tensor([2])]; tensor var_978_cast_fp16 = transpose(perm = var_977, x = hidden_states_5_cast_fp16)[name = string("transpose_153")]; tensor var_980_cast_fp16 = expand_dims(axes = var_980_axes_0, x = var_978_cast_fp16)[name = string("op_980_cast_fp16")]; string var_996_pad_type_0 = const()[name = string("op_996_pad_type_0"), val = string("valid")]; tensor var_996_strides_0 = const()[name = string("op_996_strides_0"), val = tensor([1, 1])]; tensor var_996_pad_0 = const()[name = string("op_996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_996_dilations_0 = const()[name = string("op_996_dilations_0"), val = tensor([1, 1])]; int32 var_996_groups_0 = const()[name = string("op_996_groups_0"), val = int32(1)]; tensor var_996 = conv(dilations = var_996_dilations_0, groups = var_996_groups_0, pad = var_996_pad_0, pad_type = var_996_pad_type_0, strides = var_996_strides_0, weight = layers_2_self_attn_q_proj_weight, x = var_980_cast_fp16)[name = string("op_996")]; tensor var_1001 = const()[name = string("op_1001"), val = tensor([1, 16, 64, 1])]; tensor var_1002 = reshape(shape = var_1001, x = var_996)[name = string("op_1002")]; tensor var_1007 = const()[name = string("op_1007"), val = tensor([0, 1, 3, 2])]; string var_1024_pad_type_0 = const()[name = string("op_1024_pad_type_0"), val = string("valid")]; tensor var_1024_strides_0 = const()[name = string("op_1024_strides_0"), val = tensor([1, 1])]; tensor var_1024_pad_0 = const()[name = string("op_1024_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1024_dilations_0 = const()[name = string("op_1024_dilations_0"), val = tensor([1, 1])]; int32 var_1024_groups_0 = const()[name = string("op_1024_groups_0"), val = int32(1)]; tensor var_1024 = conv(dilations = var_1024_dilations_0, groups = var_1024_groups_0, pad = var_1024_pad_0, pad_type = var_1024_pad_type_0, strides = var_1024_strides_0, weight = layers_2_self_attn_k_proj_weight, x = var_980_cast_fp16)[name = string("op_1024")]; tensor var_1029 = const()[name = string("op_1029"), val = tensor([1, 8, 64, 1])]; tensor var_1030 = reshape(shape = var_1029, x = var_1024)[name = string("op_1030")]; tensor var_1035 = const()[name = string("op_1035"), val = tensor([0, 1, 3, 2])]; string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")]; tensor var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor([1, 1])]; tensor var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor([1, 1])]; int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)]; tensor var_1052 = conv(dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_2_self_attn_v_proj_weight, x = var_980_cast_fp16)[name = string("op_1052")]; tensor var_1057 = const()[name = string("op_1057"), val = tensor([1, 8, 64, 1])]; tensor var_1058 = reshape(shape = var_1057, x = var_1052)[name = string("op_1058")]; tensor var_1063 = const()[name = string("op_1063"), val = tensor([0, 1, 3, 2])]; fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; tensor var_1008 = transpose(perm = var_1007, x = var_1002)[name = string("transpose_152")]; tensor var_1070 = mul(x = var_1008, y = const_5_promoted)[name = string("op_1070")]; int32 var_1072 = const()[name = string("op_1072"), val = int32(-1)]; bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; tensor input_33 = concat(axis = var_1072, interleave = input_33_interleave_0, values = (var_1008, var_1070))[name = string("input_33")]; tensor normed_15_axes_0 = const()[name = string("normed_15_axes_0"), val = tensor([-1])]; fp16 var_1078_to_fp16 = const()[name = string("op_1078_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_15_cast_fp16 = layer_norm(axes = normed_15_axes_0, epsilon = var_1078_to_fp16, x = input_33)[name = string("normed_15_cast_fp16")]; tensor var_1081_split_sizes_0 = const()[name = string("op_1081_split_sizes_0"), val = tensor([64, 64])]; int32 var_1081_axis_0 = const()[name = string("op_1081_axis_0"), val = int32(-1)]; tensor var_1081_0, tensor var_1081_1 = split(axis = var_1081_axis_0, split_sizes = var_1081_split_sizes_0, x = normed_15_cast_fp16)[name = string("op_1081")]; tensor q_1 = mul(x = var_1081_0, y = layers_2_self_attn_q_layernorm_weight)[name = string("q_1")]; fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; tensor var_1036 = transpose(perm = var_1035, x = var_1030)[name = string("transpose_151")]; tensor var_1084 = mul(x = var_1036, y = const_6_promoted)[name = string("op_1084")]; int32 var_1086 = const()[name = string("op_1086"), val = int32(-1)]; bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; tensor input_35 = concat(axis = var_1086, interleave = input_35_interleave_0, values = (var_1036, var_1084))[name = string("input_35")]; tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; fp16 var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1092_to_fp16, x = input_35)[name = string("normed_17_cast_fp16")]; tensor var_1095_split_sizes_0 = const()[name = string("op_1095_split_sizes_0"), val = tensor([64, 64])]; int32 var_1095_axis_0 = const()[name = string("op_1095_axis_0"), val = int32(-1)]; tensor var_1095_0, tensor var_1095_1 = split(axis = var_1095_axis_0, split_sizes = var_1095_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1095")]; tensor k_1 = mul(x = var_1095_0, y = layers_2_self_attn_k_layernorm_weight)[name = string("k_1")]; tensor var_1098 = mul(x = q_1, y = cos)[name = string("op_1098")]; tensor var_1099_split_sizes_0 = const()[name = string("op_1099_split_sizes_0"), val = tensor([32, 32])]; int32 var_1099_axis_0 = const()[name = string("op_1099_axis_0"), val = int32(-1)]; tensor var_1099_0, tensor var_1099_1 = split(axis = var_1099_axis_0, split_sizes = var_1099_split_sizes_0, x = q_1)[name = string("op_1099")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1101 = mul(x = var_1099_1, y = const_7_promoted)[name = string("op_1101")]; int32 var_1103 = const()[name = string("op_1103"), val = int32(-1)]; bool var_1104_interleave_0 = const()[name = string("op_1104_interleave_0"), val = bool(false)]; tensor var_1104 = concat(axis = var_1103, interleave = var_1104_interleave_0, values = (var_1101, var_1099_0))[name = string("op_1104")]; tensor var_1105 = mul(x = var_1104, y = sin)[name = string("op_1105")]; tensor q_3 = add(x = var_1098, y = var_1105)[name = string("q_3")]; tensor var_1108 = mul(x = k_1, y = cos)[name = string("op_1108")]; tensor var_1109_split_sizes_0 = const()[name = string("op_1109_split_sizes_0"), val = tensor([32, 32])]; int32 var_1109_axis_0 = const()[name = string("op_1109_axis_0"), val = int32(-1)]; tensor var_1109_0, tensor var_1109_1 = split(axis = var_1109_axis_0, split_sizes = var_1109_split_sizes_0, x = k_1)[name = string("op_1109")]; fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; tensor var_1111 = mul(x = var_1109_1, y = const_8_promoted)[name = string("op_1111")]; int32 var_1113 = const()[name = string("op_1113"), val = int32(-1)]; bool var_1114_interleave_0 = const()[name = string("op_1114_interleave_0"), val = bool(false)]; tensor var_1114 = concat(axis = var_1113, interleave = var_1114_interleave_0, values = (var_1111, var_1109_0))[name = string("op_1114")]; tensor var_1115 = mul(x = var_1114, y = sin)[name = string("op_1115")]; tensor k_3 = add(x = var_1108, y = var_1115)[name = string("k_3")]; tensor read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")]; tensor var_1120_begin_0 = const()[name = string("op_1120_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1120_end_0 = const()[name = string("op_1120_end_0"), val = tensor([1, 8, 2048, 64])]; tensor var_1120_end_mask_0 = const()[name = string("op_1120_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1120_squeeze_mask_0 = const()[name = string("op_1120_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1120_cast_fp16 = slice_by_index(begin = var_1120_begin_0, end = var_1120_end_0, end_mask = var_1120_end_mask_0, squeeze_mask = var_1120_squeeze_mask_0, x = read_state_0)[name = string("op_1120_cast_fp16")]; tensor K_cache_1_axes_0 = const()[name = string("K_cache_1_axes_0"), val = tensor([0])]; tensor K_cache_1_cast_fp16 = expand_dims(axes = K_cache_1_axes_0, x = var_1120_cast_fp16)[name = string("K_cache_1_cast_fp16")]; tensor var_1125_begin_0 = const()[name = string("op_1125_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_1125_end_0 = const()[name = string("op_1125_end_0"), val = tensor([7, 8, 2048, 64])]; tensor var_1125_end_mask_0 = const()[name = string("op_1125_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1125_squeeze_mask_0 = const()[name = string("op_1125_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1125_cast_fp16 = slice_by_index(begin = var_1125_begin_0, end = var_1125_end_0, end_mask = var_1125_end_mask_0, squeeze_mask = var_1125_squeeze_mask_0, x = read_state_0)[name = string("op_1125_cast_fp16")]; tensor V_cache_1_axes_0 = const()[name = string("V_cache_1_axes_0"), val = tensor([0])]; tensor V_cache_1_cast_fp16 = expand_dims(axes = V_cache_1_axes_0, x = var_1125_cast_fp16)[name = string("V_cache_1_cast_fp16")]; tensor k_b_1_reps_0 = const()[name = string("k_b_1_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b_1 = tile(reps = k_b_1_reps_0, x = k_3)[name = string("k_b_1")]; tensor v_b_1_reps_0 = const()[name = string("v_b_1_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_1064 = transpose(perm = var_1063, x = var_1058)[name = string("transpose_150")]; tensor v_b_1 = tile(reps = v_b_1_reps_0, x = var_1064)[name = string("v_b_1")]; fp16 var_1130_promoted_to_fp16 = const()[name = string("op_1130_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_1132_cast_fp16 = sub(x = var_1130_promoted_to_fp16, y = update_mask)[name = string("op_1132_cast_fp16")]; tensor var_1133_cast_fp16 = mul(x = K_cache_1_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1133_cast_fp16")]; tensor var_1134_cast_fp16 = mul(x = k_b_1, y = update_mask)[name = string("op_1134_cast_fp16")]; tensor K_new_1_cast_fp16 = add(x = var_1133_cast_fp16, y = var_1134_cast_fp16)[name = string("K_new_1_cast_fp16")]; tensor var_1140_cast_fp16 = mul(x = V_cache_1_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1140_cast_fp16")]; tensor var_1141_cast_fp16 = mul(x = v_b_1, y = update_mask)[name = string("op_1141_cast_fp16")]; tensor V_new_1_cast_fp16 = add(x = var_1140_cast_fp16, y = var_1141_cast_fp16)[name = string("V_new_1_cast_fp16")]; tensor var_1145_axes_0 = const()[name = string("op_1145_axes_0"), val = tensor([0])]; tensor var_1145_cast_fp16 = squeeze(axes = var_1145_axes_0, x = K_new_1_cast_fp16)[name = string("op_1145_cast_fp16")]; tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_1145_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_12 = read_state(input = kv_cache_0)[name = string("coreml_update_state_12")]; tensor var_1152_axes_0 = const()[name = string("op_1152_axes_0"), val = tensor([0])]; tensor var_1152_cast_fp16 = squeeze(axes = var_1152_axes_0, x = V_new_1_cast_fp16)[name = string("op_1152_cast_fp16")]; tensor concat_2 = const()[name = string("concat_2"), val = tensor([6, 0, 0, 0])]; tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1152_cast_fp16, x = coreml_update_state_12)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_13 = read_state(input = kv_cache_0)[name = string("coreml_update_state_13")]; tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_new_1_cast_fp16)[name = string("transpose_149")]; tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; tensor concat_4 = const()[name = string("concat_4"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_148")]; tensor reshape_1_cast_fp16 = reshape(shape = concat_5, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_new_1_cast_fp16)[name = string("transpose_146")]; tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; tensor concat_6 = const()[name = string("concat_6"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_6, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_145")]; tensor reshape_3_cast_fp16 = reshape(shape = concat_7, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor V_e_1_perm_0 = const()[name = string("V_e_1_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_1179_transpose_x_0 = const()[name = string("op_1179_transpose_x_0"), val = bool(false)]; bool var_1179_transpose_y_0 = const()[name = string("op_1179_transpose_y_0"), val = bool(false)]; tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_147")]; tensor var_1179_cast_fp16 = matmul(transpose_x = var_1179_transpose_x_0, transpose_y = var_1179_transpose_y_0, x = q_3, y = transpose_24_cast_fp16)[name = string("op_1179_cast_fp16")]; fp16 var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = fp16(0x1p-3)]; tensor attn_1_cast_fp16 = mul(x = var_1179_cast_fp16, y = var_1180_to_fp16)[name = string("attn_1_cast_fp16")]; tensor attn_3_cast_fp16 = add(x = attn_1_cast_fp16, y = causal_mask)[name = string("attn_3_cast_fp16")]; int32 var_1189 = const()[name = string("op_1189"), val = int32(-1)]; tensor var_1191_cast_fp16 = softmax(axis = var_1189, x = attn_3_cast_fp16)[name = string("op_1191_cast_fp16")]; bool var_1207_transpose_x_0 = const()[name = string("op_1207_transpose_x_0"), val = bool(false)]; bool var_1207_transpose_y_0 = const()[name = string("op_1207_transpose_y_0"), val = bool(false)]; tensor V_e_1_cast_fp16 = transpose(perm = V_e_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_144")]; tensor var_1207_cast_fp16 = matmul(transpose_x = var_1207_transpose_x_0, transpose_y = var_1207_transpose_y_0, x = var_1191_cast_fp16, y = V_e_1_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor var_1217 = const()[name = string("op_1217"), val = tensor([0, 2, 1, 3])]; tensor var_1224 = const()[name = string("op_1224"), val = tensor([1, 1, -1])]; tensor var_1218 = transpose(perm = var_1217, x = var_1207_cast_fp16)[name = string("transpose_143")]; tensor out_3 = reshape(shape = var_1224, x = var_1218)[name = string("out_3")]; tensor var_1229 = const()[name = string("op_1229"), val = tensor([0, 2, 1])]; tensor squeeze_0 = const()[name = string("squeeze_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814774144)))]; string var_1245_pad_type_0 = const()[name = string("op_1245_pad_type_0"), val = string("valid")]; int32 var_1245_groups_0 = const()[name = string("op_1245_groups_0"), val = int32(1)]; tensor var_1245_strides_0 = const()[name = string("op_1245_strides_0"), val = tensor([1])]; tensor var_1245_pad_0 = const()[name = string("op_1245_pad_0"), val = tensor([0, 0])]; tensor var_1245_dilations_0 = const()[name = string("op_1245_dilations_0"), val = tensor([1])]; tensor var_1230 = transpose(perm = var_1229, x = out_3)[name = string("transpose_142")]; tensor var_1245 = conv(dilations = var_1245_dilations_0, groups = var_1245_groups_0, pad = var_1245_pad_0, pad_type = var_1245_pad_type_0, strides = var_1245_strides_0, weight = squeeze_0, x = var_1230)[name = string("op_1245")]; tensor var_1249 = const()[name = string("op_1249"), val = tensor([0, 2, 1])]; tensor op_out_5 = transpose(perm = var_1249, x = var_1245)[name = string("transpose_141")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = op_out_5)[name = string("x_15_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1253_cast_fp16 = mul(x = x_15_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1253_cast_fp16")]; int32 var_1255 = const()[name = string("op_1255"), val = int32(-1)]; bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; tensor input_39_cast_fp16 = concat(axis = var_1255, interleave = input_39_interleave_0, values = (x_15_cast_fp16, var_1253_cast_fp16))[name = string("input_39_cast_fp16")]; tensor normed_19_axes_0 = const()[name = string("normed_19_axes_0"), val = tensor([-1])]; fp16 var_1261_to_fp16 = const()[name = string("op_1261_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_19_cast_fp16 = layer_norm(axes = normed_19_axes_0, epsilon = var_1261_to_fp16, x = input_39_cast_fp16)[name = string("normed_19_cast_fp16")]; tensor var_1264_split_sizes_0 = const()[name = string("op_1264_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1264_axis_0 = const()[name = string("op_1264_axis_0"), val = int32(-1)]; tensor var_1264_cast_fp16_0, tensor var_1264_cast_fp16_1 = split(axis = var_1264_axis_0, split_sizes = var_1264_split_sizes_0, x = normed_19_cast_fp16)[name = string("op_1264_cast_fp16")]; tensor layers_2_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816871360)))]; tensor normed_21_cast_fp16 = mul(x = var_1264_cast_fp16_0, y = layers_2_ffn_norm_weight_promoted_to_fp16)[name = string("normed_21_cast_fp16")]; tensor var_1270 = const()[name = string("op_1270"), val = tensor([0, 2, 1])]; tensor var_1273_axes_0 = const()[name = string("op_1273_axes_0"), val = tensor([2])]; tensor var_1271_cast_fp16 = transpose(perm = var_1270, x = normed_21_cast_fp16)[name = string("transpose_140")]; tensor var_1273_cast_fp16 = expand_dims(axes = var_1273_axes_0, x = var_1271_cast_fp16)[name = string("op_1273_cast_fp16")]; string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")]; tensor input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor([1, 1])]; tensor input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor([1, 1])]; int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)]; tensor input_43 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = layers_2_feed_forward_w1_weight, x = var_1273_cast_fp16)[name = string("input_43")]; string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = layers_2_feed_forward_w3_weight, x = var_1273_cast_fp16)[name = string("b_5")]; tensor var_1301 = silu(x = input_43)[name = string("op_1301")]; tensor input_45 = mul(x = var_1301, y = b_5)[name = string("input_45")]; string mlp_9_pad_type_0 = const()[name = string("mlp_9_pad_type_0"), val = string("valid")]; tensor mlp_9_strides_0 = const()[name = string("mlp_9_strides_0"), val = tensor([1, 1])]; tensor mlp_9_pad_0 = const()[name = string("mlp_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_9_dilations_0 = const()[name = string("mlp_9_dilations_0"), val = tensor([1, 1])]; int32 mlp_9_groups_0 = const()[name = string("mlp_9_groups_0"), val = int32(1)]; tensor mlp_9 = conv(dilations = mlp_9_dilations_0, groups = mlp_9_groups_0, pad = mlp_9_pad_0, pad_type = mlp_9_pad_type_0, strides = mlp_9_strides_0, weight = layers_2_feed_forward_w2_weight, x = input_45)[name = string("mlp_9")]; tensor var_1315_axes_0 = const()[name = string("op_1315_axes_0"), val = tensor([2])]; tensor var_1315 = squeeze(axes = var_1315_axes_0, x = mlp_9)[name = string("op_1315")]; tensor var_1319 = const()[name = string("op_1319"), val = tensor([0, 2, 1])]; tensor mlp_11 = transpose(perm = var_1319, x = var_1315)[name = string("transpose_139")]; tensor x_17_cast_fp16 = add(x = x_15_cast_fp16, y = mlp_11)[name = string("x_17_cast_fp16")]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1323_cast_fp16 = mul(x = x_17_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1323_cast_fp16")]; int32 var_1325 = const()[name = string("op_1325"), val = int32(-1)]; bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; tensor input_47_cast_fp16 = concat(axis = var_1325, interleave = input_47_interleave_0, values = (x_17_cast_fp16, var_1323_cast_fp16))[name = string("input_47_cast_fp16")]; tensor normed_23_axes_0 = const()[name = string("normed_23_axes_0"), val = tensor([-1])]; fp16 var_1331_to_fp16 = const()[name = string("op_1331_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_23_cast_fp16 = layer_norm(axes = normed_23_axes_0, epsilon = var_1331_to_fp16, x = input_47_cast_fp16)[name = string("normed_23_cast_fp16")]; tensor var_1334_split_sizes_0 = const()[name = string("op_1334_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1334_axis_0 = const()[name = string("op_1334_axis_0"), val = int32(-1)]; tensor var_1334_cast_fp16_0, tensor var_1334_cast_fp16_1 = split(axis = var_1334_axis_0, split_sizes = var_1334_split_sizes_0, x = normed_23_cast_fp16)[name = string("op_1334_cast_fp16")]; tensor layers_3_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816873472)))]; tensor hidden_states_7_cast_fp16 = mul(x = var_1334_cast_fp16_0, y = layers_3_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor var_1340 = const()[name = string("op_1340"), val = tensor([0, 2, 1])]; tensor var_1343_axes_0 = const()[name = string("op_1343_axes_0"), val = tensor([2])]; tensor var_1341_cast_fp16 = transpose(perm = var_1340, x = hidden_states_7_cast_fp16)[name = string("transpose_138")]; tensor var_1343_cast_fp16 = expand_dims(axes = var_1343_axes_0, x = var_1341_cast_fp16)[name = string("op_1343_cast_fp16")]; string BCx_5_pad_type_0 = const()[name = string("BCx_5_pad_type_0"), val = string("valid")]; tensor BCx_5_strides_0 = const()[name = string("BCx_5_strides_0"), val = tensor([1, 1])]; tensor BCx_5_pad_0 = const()[name = string("BCx_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_5_dilations_0 = const()[name = string("BCx_5_dilations_0"), val = tensor([1, 1])]; int32 BCx_5_groups_0 = const()[name = string("BCx_5_groups_0"), val = int32(1)]; tensor BCx_5 = conv(dilations = BCx_5_dilations_0, groups = BCx_5_groups_0, pad = BCx_5_pad_0, pad_type = BCx_5_pad_type_0, strides = BCx_5_strides_0, weight = layers_3_conv_in_proj_weight, x = var_1343_cast_fp16)[name = string("BCx_5")]; tensor var_1360_split_sizes_0 = const()[name = string("op_1360_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_1360_axis_0 = const()[name = string("op_1360_axis_0"), val = int32(1)]; tensor var_1360_0, tensor var_1360_1, tensor var_1360_2 = split(axis = var_1360_axis_0, split_sizes = var_1360_split_sizes_0, x = BCx_5)[name = string("op_1360")]; tensor Bx_5 = mul(x = var_1360_0, y = var_1360_2)[name = string("Bx_5")]; tensor var_1366_begin_0 = const()[name = string("op_1366_begin_0"), val = tensor([2, 0, 0])]; tensor var_1366_end_0 = const()[name = string("op_1366_end_0"), val = tensor([3, 1024, 3])]; tensor var_1366_end_mask_0 = const()[name = string("op_1366_end_mask_0"), val = tensor([false, true, true])]; tensor var_1366_squeeze_mask_0 = const()[name = string("op_1366_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_1366_cast_fp16 = slice_by_index(begin = var_1366_begin_0, end = var_1366_end_0, end_mask = var_1366_end_mask_0, squeeze_mask = var_1366_squeeze_mask_0, x = conv_state_in)[name = string("op_1366_cast_fp16")]; tensor var_1368_axes_0 = const()[name = string("op_1368_axes_0"), val = tensor([0])]; tensor var_1368_cast_fp16 = expand_dims(axes = var_1368_axes_0, x = var_1366_cast_fp16)[name = string("op_1368_cast_fp16")]; tensor slot_5_axes_0 = const()[name = string("slot_5_axes_0"), val = tensor([2])]; tensor slot_5_cast_fp16 = expand_dims(axes = slot_5_axes_0, x = var_1368_cast_fp16)[name = string("slot_5_cast_fp16")]; tensor live_tail_5_begin_0 = const()[name = string("live_tail_5_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_5_end_0 = const()[name = string("live_tail_5_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_5_end_mask_0 = const()[name = string("live_tail_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_5_cast_fp16 = slice_by_index(begin = live_tail_5_begin_0, end = live_tail_5_end_0, end_mask = live_tail_5_end_mask_0, x = slot_5_cast_fp16)[name = string("live_tail_5_cast_fp16")]; int32 var_1377 = const()[name = string("op_1377"), val = int32(-1)]; bool new_state_5_interleave_0 = const()[name = string("new_state_5_interleave_0"), val = bool(false)]; tensor new_state_5_cast_fp16 = concat(axis = var_1377, interleave = new_state_5_interleave_0, values = (live_tail_5_cast_fp16, Bx_5))[name = string("new_state_5_cast_fp16")]; tensor var_1380_axes_0 = const()[name = string("op_1380_axes_0"), val = tensor([0])]; tensor var_1380_cast_fp16 = squeeze(axes = var_1380_axes_0, x = new_state_5_cast_fp16)[name = string("op_1380_cast_fp16")]; tensor var_1382_axes_0 = const()[name = string("op_1382_axes_0"), val = tensor([1])]; tensor var_1382_cast_fp16 = squeeze(axes = var_1382_axes_0, x = var_1380_cast_fp16)[name = string("op_1382_cast_fp16")]; string conv_out_5_pad_type_0 = const()[name = string("conv_out_5_pad_type_0"), val = string("valid")]; int32 conv_out_5_groups_0 = const()[name = string("conv_out_5_groups_0"), val = int32(1024)]; tensor conv_out_5_strides_0 = const()[name = string("conv_out_5_strides_0"), val = tensor([1, 1])]; tensor conv_out_5_pad_0 = const()[name = string("conv_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_5_dilations_0 = const()[name = string("conv_out_5_dilations_0"), val = tensor([1, 1])]; tensor layers_3_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_3_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816875584)))]; tensor conv_out_5_cast_fp16 = conv(dilations = conv_out_5_dilations_0, groups = conv_out_5_groups_0, pad = conv_out_5_pad_0, pad_type = conv_out_5_pad_type_0, strides = conv_out_5_strides_0, weight = layers_3_conv_conv_weight_promoted_to_fp16, x = new_state_5_cast_fp16)[name = string("conv_out_5_cast_fp16")]; tensor input_51_cast_fp16 = mul(x = var_1360_1, y = conv_out_5_cast_fp16)[name = string("input_51_cast_fp16")]; string y_5_pad_type_0 = const()[name = string("y_5_pad_type_0"), val = string("valid")]; tensor y_5_strides_0 = const()[name = string("y_5_strides_0"), val = tensor([1, 1])]; tensor y_5_pad_0 = const()[name = string("y_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_5_dilations_0 = const()[name = string("y_5_dilations_0"), val = tensor([1, 1])]; int32 y_5_groups_0 = const()[name = string("y_5_groups_0"), val = int32(1)]; tensor layers_3_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_3_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816881792)))]; tensor y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = layers_3_conv_out_proj_weight_promoted_to_fp16, x = input_51_cast_fp16)[name = string("y_5_cast_fp16")]; tensor var_1408_axes_0 = const()[name = string("op_1408_axes_0"), val = tensor([2])]; tensor var_1408_cast_fp16 = squeeze(axes = var_1408_axes_0, x = y_5_cast_fp16)[name = string("op_1408_cast_fp16")]; tensor var_1412 = const()[name = string("op_1412"), val = tensor([0, 2, 1])]; tensor op_out_7_cast_fp16 = transpose(perm = var_1412, x = var_1408_cast_fp16)[name = string("transpose_137")]; tensor x_19_cast_fp16 = add(x = x_17_cast_fp16, y = op_out_7_cast_fp16)[name = string("x_19_cast_fp16")]; fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1416_cast_fp16 = mul(x = x_19_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1416_cast_fp16")]; int32 var_1418 = const()[name = string("op_1418"), val = int32(-1)]; bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; tensor input_53_cast_fp16 = concat(axis = var_1418, interleave = input_53_interleave_0, values = (x_19_cast_fp16, var_1416_cast_fp16))[name = string("input_53_cast_fp16")]; tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; fp16 var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1424_to_fp16, x = input_53_cast_fp16)[name = string("normed_25_cast_fp16")]; tensor var_1427_split_sizes_0 = const()[name = string("op_1427_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1427_axis_0 = const()[name = string("op_1427_axis_0"), val = int32(-1)]; tensor var_1427_cast_fp16_0, tensor var_1427_cast_fp16_1 = split(axis = var_1427_axis_0, split_sizes = var_1427_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1427_cast_fp16")]; tensor layers_3_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818979008)))]; tensor normed_27_cast_fp16 = mul(x = var_1427_cast_fp16_0, y = layers_3_ffn_norm_weight_promoted_to_fp16)[name = string("normed_27_cast_fp16")]; tensor var_1433 = const()[name = string("op_1433"), val = tensor([0, 2, 1])]; tensor var_1436_axes_0 = const()[name = string("op_1436_axes_0"), val = tensor([2])]; tensor var_1434_cast_fp16 = transpose(perm = var_1433, x = normed_27_cast_fp16)[name = string("transpose_136")]; tensor var_1436_cast_fp16 = expand_dims(axes = var_1436_axes_0, x = var_1434_cast_fp16)[name = string("op_1436_cast_fp16")]; string input_57_pad_type_0 = const()[name = string("input_57_pad_type_0"), val = string("valid")]; tensor input_57_strides_0 = const()[name = string("input_57_strides_0"), val = tensor([1, 1])]; tensor input_57_pad_0 = const()[name = string("input_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_57_dilations_0 = const()[name = string("input_57_dilations_0"), val = tensor([1, 1])]; int32 input_57_groups_0 = const()[name = string("input_57_groups_0"), val = int32(1)]; tensor input_57 = conv(dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = layers_3_feed_forward_w1_weight, x = var_1436_cast_fp16)[name = string("input_57")]; string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = layers_3_feed_forward_w3_weight, x = var_1436_cast_fp16)[name = string("b_7")]; tensor var_1464 = silu(x = input_57)[name = string("op_1464")]; tensor input_59 = mul(x = var_1464, y = b_7)[name = string("input_59")]; string mlp_13_pad_type_0 = const()[name = string("mlp_13_pad_type_0"), val = string("valid")]; tensor mlp_13_strides_0 = const()[name = string("mlp_13_strides_0"), val = tensor([1, 1])]; tensor mlp_13_pad_0 = const()[name = string("mlp_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_13_dilations_0 = const()[name = string("mlp_13_dilations_0"), val = tensor([1, 1])]; int32 mlp_13_groups_0 = const()[name = string("mlp_13_groups_0"), val = int32(1)]; tensor mlp_13 = conv(dilations = mlp_13_dilations_0, groups = mlp_13_groups_0, pad = mlp_13_pad_0, pad_type = mlp_13_pad_type_0, strides = mlp_13_strides_0, weight = layers_3_feed_forward_w2_weight, x = input_59)[name = string("mlp_13")]; tensor var_1478_axes_0 = const()[name = string("op_1478_axes_0"), val = tensor([2])]; tensor var_1478 = squeeze(axes = var_1478_axes_0, x = mlp_13)[name = string("op_1478")]; tensor var_1482 = const()[name = string("op_1482"), val = tensor([0, 2, 1])]; tensor mlp_15 = transpose(perm = var_1482, x = var_1478)[name = string("transpose_135")]; tensor x_21_cast_fp16 = add(x = x_19_cast_fp16, y = mlp_15)[name = string("x_21_cast_fp16")]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1486_cast_fp16 = mul(x = x_21_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1486_cast_fp16")]; int32 var_1488 = const()[name = string("op_1488"), val = int32(-1)]; bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; tensor input_61_cast_fp16 = concat(axis = var_1488, interleave = input_61_interleave_0, values = (x_21_cast_fp16, var_1486_cast_fp16))[name = string("input_61_cast_fp16")]; tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; fp16 var_1494_to_fp16 = const()[name = string("op_1494_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1494_to_fp16, x = input_61_cast_fp16)[name = string("normed_29_cast_fp16")]; tensor var_1497_split_sizes_0 = const()[name = string("op_1497_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1497_axis_0 = const()[name = string("op_1497_axis_0"), val = int32(-1)]; tensor var_1497_cast_fp16_0, tensor var_1497_cast_fp16_1 = split(axis = var_1497_axis_0, split_sizes = var_1497_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor layers_4_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818981120)))]; tensor hidden_states_9_cast_fp16 = mul(x = var_1497_cast_fp16_0, y = layers_4_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor var_1503 = const()[name = string("op_1503"), val = tensor([0, 2, 1])]; tensor var_1506_axes_0 = const()[name = string("op_1506_axes_0"), val = tensor([2])]; tensor var_1504_cast_fp16 = transpose(perm = var_1503, x = hidden_states_9_cast_fp16)[name = string("transpose_134")]; tensor var_1506_cast_fp16 = expand_dims(axes = var_1506_axes_0, x = var_1504_cast_fp16)[name = string("op_1506_cast_fp16")]; string BCx_7_pad_type_0 = const()[name = string("BCx_7_pad_type_0"), val = string("valid")]; tensor BCx_7_strides_0 = const()[name = string("BCx_7_strides_0"), val = tensor([1, 1])]; tensor BCx_7_pad_0 = const()[name = string("BCx_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_7_dilations_0 = const()[name = string("BCx_7_dilations_0"), val = tensor([1, 1])]; int32 BCx_7_groups_0 = const()[name = string("BCx_7_groups_0"), val = int32(1)]; tensor BCx_7 = conv(dilations = BCx_7_dilations_0, groups = BCx_7_groups_0, pad = BCx_7_pad_0, pad_type = BCx_7_pad_type_0, strides = BCx_7_strides_0, weight = layers_4_conv_in_proj_weight, x = var_1506_cast_fp16)[name = string("BCx_7")]; tensor var_1523_split_sizes_0 = const()[name = string("op_1523_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_1523_axis_0 = const()[name = string("op_1523_axis_0"), val = int32(1)]; tensor var_1523_0, tensor var_1523_1, tensor var_1523_2 = split(axis = var_1523_axis_0, split_sizes = var_1523_split_sizes_0, x = BCx_7)[name = string("op_1523")]; tensor Bx_7 = mul(x = var_1523_0, y = var_1523_2)[name = string("Bx_7")]; tensor var_1529_begin_0 = const()[name = string("op_1529_begin_0"), val = tensor([3, 0, 0])]; tensor var_1529_end_0 = const()[name = string("op_1529_end_0"), val = tensor([4, 1024, 3])]; tensor var_1529_end_mask_0 = const()[name = string("op_1529_end_mask_0"), val = tensor([false, true, true])]; tensor var_1529_squeeze_mask_0 = const()[name = string("op_1529_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_1529_cast_fp16 = slice_by_index(begin = var_1529_begin_0, end = var_1529_end_0, end_mask = var_1529_end_mask_0, squeeze_mask = var_1529_squeeze_mask_0, x = conv_state_in)[name = string("op_1529_cast_fp16")]; tensor var_1531_axes_0 = const()[name = string("op_1531_axes_0"), val = tensor([0])]; tensor var_1531_cast_fp16 = expand_dims(axes = var_1531_axes_0, x = var_1529_cast_fp16)[name = string("op_1531_cast_fp16")]; tensor slot_7_axes_0 = const()[name = string("slot_7_axes_0"), val = tensor([2])]; tensor slot_7_cast_fp16 = expand_dims(axes = slot_7_axes_0, x = var_1531_cast_fp16)[name = string("slot_7_cast_fp16")]; tensor live_tail_7_begin_0 = const()[name = string("live_tail_7_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_7_end_0 = const()[name = string("live_tail_7_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_7_end_mask_0 = const()[name = string("live_tail_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_7_cast_fp16 = slice_by_index(begin = live_tail_7_begin_0, end = live_tail_7_end_0, end_mask = live_tail_7_end_mask_0, x = slot_7_cast_fp16)[name = string("live_tail_7_cast_fp16")]; int32 var_1540 = const()[name = string("op_1540"), val = int32(-1)]; bool new_state_7_interleave_0 = const()[name = string("new_state_7_interleave_0"), val = bool(false)]; tensor new_state_7_cast_fp16 = concat(axis = var_1540, interleave = new_state_7_interleave_0, values = (live_tail_7_cast_fp16, Bx_7))[name = string("new_state_7_cast_fp16")]; tensor var_1543_axes_0 = const()[name = string("op_1543_axes_0"), val = tensor([0])]; tensor var_1543_cast_fp16 = squeeze(axes = var_1543_axes_0, x = new_state_7_cast_fp16)[name = string("op_1543_cast_fp16")]; tensor var_1545_axes_0 = const()[name = string("op_1545_axes_0"), val = tensor([1])]; tensor var_1545_cast_fp16 = squeeze(axes = var_1545_axes_0, x = var_1543_cast_fp16)[name = string("op_1545_cast_fp16")]; string conv_out_7_pad_type_0 = const()[name = string("conv_out_7_pad_type_0"), val = string("valid")]; int32 conv_out_7_groups_0 = const()[name = string("conv_out_7_groups_0"), val = int32(1024)]; tensor conv_out_7_strides_0 = const()[name = string("conv_out_7_strides_0"), val = tensor([1, 1])]; tensor conv_out_7_pad_0 = const()[name = string("conv_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_7_dilations_0 = const()[name = string("conv_out_7_dilations_0"), val = tensor([1, 1])]; tensor layers_4_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_4_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818983232)))]; tensor conv_out_7_cast_fp16 = conv(dilations = conv_out_7_dilations_0, groups = conv_out_7_groups_0, pad = conv_out_7_pad_0, pad_type = conv_out_7_pad_type_0, strides = conv_out_7_strides_0, weight = layers_4_conv_conv_weight_promoted_to_fp16, x = new_state_7_cast_fp16)[name = string("conv_out_7_cast_fp16")]; tensor input_65_cast_fp16 = mul(x = var_1523_1, y = conv_out_7_cast_fp16)[name = string("input_65_cast_fp16")]; string y_7_pad_type_0 = const()[name = string("y_7_pad_type_0"), val = string("valid")]; tensor y_7_strides_0 = const()[name = string("y_7_strides_0"), val = tensor([1, 1])]; tensor y_7_pad_0 = const()[name = string("y_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_7_dilations_0 = const()[name = string("y_7_dilations_0"), val = tensor([1, 1])]; int32 y_7_groups_0 = const()[name = string("y_7_groups_0"), val = int32(1)]; tensor layers_4_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_4_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818989440)))]; tensor y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = layers_4_conv_out_proj_weight_promoted_to_fp16, x = input_65_cast_fp16)[name = string("y_7_cast_fp16")]; tensor var_1571_axes_0 = const()[name = string("op_1571_axes_0"), val = tensor([2])]; tensor var_1571_cast_fp16 = squeeze(axes = var_1571_axes_0, x = y_7_cast_fp16)[name = string("op_1571_cast_fp16")]; tensor var_1575 = const()[name = string("op_1575"), val = tensor([0, 2, 1])]; tensor op_out_9_cast_fp16 = transpose(perm = var_1575, x = var_1571_cast_fp16)[name = string("transpose_133")]; tensor x_23_cast_fp16 = add(x = x_21_cast_fp16, y = op_out_9_cast_fp16)[name = string("x_23_cast_fp16")]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1579_cast_fp16 = mul(x = x_23_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1579_cast_fp16")]; int32 var_1581 = const()[name = string("op_1581"), val = int32(-1)]; bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; tensor input_67_cast_fp16 = concat(axis = var_1581, interleave = input_67_interleave_0, values = (x_23_cast_fp16, var_1579_cast_fp16))[name = string("input_67_cast_fp16")]; tensor normed_31_axes_0 = const()[name = string("normed_31_axes_0"), val = tensor([-1])]; fp16 var_1587_to_fp16 = const()[name = string("op_1587_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_31_cast_fp16 = layer_norm(axes = normed_31_axes_0, epsilon = var_1587_to_fp16, x = input_67_cast_fp16)[name = string("normed_31_cast_fp16")]; tensor var_1590_split_sizes_0 = const()[name = string("op_1590_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1590_axis_0 = const()[name = string("op_1590_axis_0"), val = int32(-1)]; tensor var_1590_cast_fp16_0, tensor var_1590_cast_fp16_1 = split(axis = var_1590_axis_0, split_sizes = var_1590_split_sizes_0, x = normed_31_cast_fp16)[name = string("op_1590_cast_fp16")]; tensor layers_4_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821086656)))]; tensor normed_33_cast_fp16 = mul(x = var_1590_cast_fp16_0, y = layers_4_ffn_norm_weight_promoted_to_fp16)[name = string("normed_33_cast_fp16")]; tensor var_1596 = const()[name = string("op_1596"), val = tensor([0, 2, 1])]; tensor var_1599_axes_0 = const()[name = string("op_1599_axes_0"), val = tensor([2])]; tensor var_1597_cast_fp16 = transpose(perm = var_1596, x = normed_33_cast_fp16)[name = string("transpose_132")]; tensor var_1599_cast_fp16 = expand_dims(axes = var_1599_axes_0, x = var_1597_cast_fp16)[name = string("op_1599_cast_fp16")]; string input_71_pad_type_0 = const()[name = string("input_71_pad_type_0"), val = string("valid")]; tensor input_71_strides_0 = const()[name = string("input_71_strides_0"), val = tensor([1, 1])]; tensor input_71_pad_0 = const()[name = string("input_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_71_dilations_0 = const()[name = string("input_71_dilations_0"), val = tensor([1, 1])]; int32 input_71_groups_0 = const()[name = string("input_71_groups_0"), val = int32(1)]; tensor input_71 = conv(dilations = input_71_dilations_0, groups = input_71_groups_0, pad = input_71_pad_0, pad_type = input_71_pad_type_0, strides = input_71_strides_0, weight = layers_4_feed_forward_w1_weight, x = var_1599_cast_fp16)[name = string("input_71")]; string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = layers_4_feed_forward_w3_weight, x = var_1599_cast_fp16)[name = string("b_9")]; tensor var_1627 = silu(x = input_71)[name = string("op_1627")]; tensor input_73 = mul(x = var_1627, y = b_9)[name = string("input_73")]; string mlp_17_pad_type_0 = const()[name = string("mlp_17_pad_type_0"), val = string("valid")]; tensor mlp_17_strides_0 = const()[name = string("mlp_17_strides_0"), val = tensor([1, 1])]; tensor mlp_17_pad_0 = const()[name = string("mlp_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_17_dilations_0 = const()[name = string("mlp_17_dilations_0"), val = tensor([1, 1])]; int32 mlp_17_groups_0 = const()[name = string("mlp_17_groups_0"), val = int32(1)]; tensor mlp_17 = conv(dilations = mlp_17_dilations_0, groups = mlp_17_groups_0, pad = mlp_17_pad_0, pad_type = mlp_17_pad_type_0, strides = mlp_17_strides_0, weight = layers_4_feed_forward_w2_weight, x = input_73)[name = string("mlp_17")]; tensor var_1641_axes_0 = const()[name = string("op_1641_axes_0"), val = tensor([2])]; tensor var_1641 = squeeze(axes = var_1641_axes_0, x = mlp_17)[name = string("op_1641")]; tensor var_1645 = const()[name = string("op_1645"), val = tensor([0, 2, 1])]; tensor mlp_19 = transpose(perm = var_1645, x = var_1641)[name = string("transpose_131")]; tensor x_25_cast_fp16 = add(x = x_23_cast_fp16, y = mlp_19)[name = string("x_25_cast_fp16")]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1649_cast_fp16 = mul(x = x_25_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1649_cast_fp16")]; int32 var_1651 = const()[name = string("op_1651"), val = int32(-1)]; bool input_75_interleave_0 = const()[name = string("input_75_interleave_0"), val = bool(false)]; tensor input_75_cast_fp16 = concat(axis = var_1651, interleave = input_75_interleave_0, values = (x_25_cast_fp16, var_1649_cast_fp16))[name = string("input_75_cast_fp16")]; tensor normed_35_axes_0 = const()[name = string("normed_35_axes_0"), val = tensor([-1])]; fp16 var_1657_to_fp16 = const()[name = string("op_1657_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_35_cast_fp16 = layer_norm(axes = normed_35_axes_0, epsilon = var_1657_to_fp16, x = input_75_cast_fp16)[name = string("normed_35_cast_fp16")]; tensor var_1660_split_sizes_0 = const()[name = string("op_1660_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1660_axis_0 = const()[name = string("op_1660_axis_0"), val = int32(-1)]; tensor var_1660_cast_fp16_0, tensor var_1660_cast_fp16_1 = split(axis = var_1660_axis_0, split_sizes = var_1660_split_sizes_0, x = normed_35_cast_fp16)[name = string("op_1660_cast_fp16")]; tensor layers_5_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821088768)))]; tensor hidden_states_11_cast_fp16 = mul(x = var_1660_cast_fp16_0, y = layers_5_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor var_1666 = const()[name = string("op_1666"), val = tensor([0, 2, 1])]; tensor var_1669_axes_0 = const()[name = string("op_1669_axes_0"), val = tensor([2])]; tensor var_1667_cast_fp16 = transpose(perm = var_1666, x = hidden_states_11_cast_fp16)[name = string("transpose_130")]; tensor var_1669_cast_fp16 = expand_dims(axes = var_1669_axes_0, x = var_1667_cast_fp16)[name = string("op_1669_cast_fp16")]; string var_1685_pad_type_0 = const()[name = string("op_1685_pad_type_0"), val = string("valid")]; tensor var_1685_strides_0 = const()[name = string("op_1685_strides_0"), val = tensor([1, 1])]; tensor var_1685_pad_0 = const()[name = string("op_1685_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1685_dilations_0 = const()[name = string("op_1685_dilations_0"), val = tensor([1, 1])]; int32 var_1685_groups_0 = const()[name = string("op_1685_groups_0"), val = int32(1)]; tensor var_1685 = conv(dilations = var_1685_dilations_0, groups = var_1685_groups_0, pad = var_1685_pad_0, pad_type = var_1685_pad_type_0, strides = var_1685_strides_0, weight = layers_5_self_attn_q_proj_weight, x = var_1669_cast_fp16)[name = string("op_1685")]; tensor var_1690 = const()[name = string("op_1690"), val = tensor([1, 16, 64, 1])]; tensor var_1691 = reshape(shape = var_1690, x = var_1685)[name = string("op_1691")]; tensor var_1696 = const()[name = string("op_1696"), val = tensor([0, 1, 3, 2])]; string var_1713_pad_type_0 = const()[name = string("op_1713_pad_type_0"), val = string("valid")]; tensor var_1713_strides_0 = const()[name = string("op_1713_strides_0"), val = tensor([1, 1])]; tensor var_1713_pad_0 = const()[name = string("op_1713_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1713_dilations_0 = const()[name = string("op_1713_dilations_0"), val = tensor([1, 1])]; int32 var_1713_groups_0 = const()[name = string("op_1713_groups_0"), val = int32(1)]; tensor var_1713 = conv(dilations = var_1713_dilations_0, groups = var_1713_groups_0, pad = var_1713_pad_0, pad_type = var_1713_pad_type_0, strides = var_1713_strides_0, weight = layers_5_self_attn_k_proj_weight, x = var_1669_cast_fp16)[name = string("op_1713")]; tensor var_1718 = const()[name = string("op_1718"), val = tensor([1, 8, 64, 1])]; tensor var_1719 = reshape(shape = var_1718, x = var_1713)[name = string("op_1719")]; tensor var_1724 = const()[name = string("op_1724"), val = tensor([0, 1, 3, 2])]; string var_1741_pad_type_0 = const()[name = string("op_1741_pad_type_0"), val = string("valid")]; tensor var_1741_strides_0 = const()[name = string("op_1741_strides_0"), val = tensor([1, 1])]; tensor var_1741_pad_0 = const()[name = string("op_1741_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1741_dilations_0 = const()[name = string("op_1741_dilations_0"), val = tensor([1, 1])]; int32 var_1741_groups_0 = const()[name = string("op_1741_groups_0"), val = int32(1)]; tensor var_1741 = conv(dilations = var_1741_dilations_0, groups = var_1741_groups_0, pad = var_1741_pad_0, pad_type = var_1741_pad_type_0, strides = var_1741_strides_0, weight = layers_5_self_attn_v_proj_weight, x = var_1669_cast_fp16)[name = string("op_1741")]; tensor var_1746 = const()[name = string("op_1746"), val = tensor([1, 8, 64, 1])]; tensor var_1747 = reshape(shape = var_1746, x = var_1741)[name = string("op_1747")]; tensor var_1752 = const()[name = string("op_1752"), val = tensor([0, 1, 3, 2])]; fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; tensor var_1697 = transpose(perm = var_1696, x = var_1691)[name = string("transpose_129")]; tensor var_1759 = mul(x = var_1697, y = const_15_promoted)[name = string("op_1759")]; int32 var_1761 = const()[name = string("op_1761"), val = int32(-1)]; bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; tensor input_79 = concat(axis = var_1761, interleave = input_79_interleave_0, values = (var_1697, var_1759))[name = string("input_79")]; tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; fp16 var_1767_to_fp16 = const()[name = string("op_1767_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1767_to_fp16, x = input_79)[name = string("normed_37_cast_fp16")]; tensor var_1770_split_sizes_0 = const()[name = string("op_1770_split_sizes_0"), val = tensor([64, 64])]; int32 var_1770_axis_0 = const()[name = string("op_1770_axis_0"), val = int32(-1)]; tensor var_1770_0, tensor var_1770_1 = split(axis = var_1770_axis_0, split_sizes = var_1770_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1770")]; tensor q_5 = mul(x = var_1770_0, y = layers_5_self_attn_q_layernorm_weight)[name = string("q_5")]; fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; tensor var_1725 = transpose(perm = var_1724, x = var_1719)[name = string("transpose_128")]; tensor var_1773 = mul(x = var_1725, y = const_16_promoted)[name = string("op_1773")]; int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; tensor input_81 = concat(axis = var_1775, interleave = input_81_interleave_0, values = (var_1725, var_1773))[name = string("input_81")]; tensor normed_39_axes_0 = const()[name = string("normed_39_axes_0"), val = tensor([-1])]; fp16 var_1781_to_fp16 = const()[name = string("op_1781_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_39_cast_fp16 = layer_norm(axes = normed_39_axes_0, epsilon = var_1781_to_fp16, x = input_81)[name = string("normed_39_cast_fp16")]; tensor var_1784_split_sizes_0 = const()[name = string("op_1784_split_sizes_0"), val = tensor([64, 64])]; int32 var_1784_axis_0 = const()[name = string("op_1784_axis_0"), val = int32(-1)]; tensor var_1784_0, tensor var_1784_1 = split(axis = var_1784_axis_0, split_sizes = var_1784_split_sizes_0, x = normed_39_cast_fp16)[name = string("op_1784")]; tensor k_5 = mul(x = var_1784_0, y = layers_5_self_attn_k_layernorm_weight)[name = string("k_5")]; tensor var_1787 = mul(x = q_5, y = cos)[name = string("op_1787")]; tensor var_1788_split_sizes_0 = const()[name = string("op_1788_split_sizes_0"), val = tensor([32, 32])]; int32 var_1788_axis_0 = const()[name = string("op_1788_axis_0"), val = int32(-1)]; tensor var_1788_0, tensor var_1788_1 = split(axis = var_1788_axis_0, split_sizes = var_1788_split_sizes_0, x = q_5)[name = string("op_1788")]; fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; tensor var_1790 = mul(x = var_1788_1, y = const_17_promoted)[name = string("op_1790")]; int32 var_1792 = const()[name = string("op_1792"), val = int32(-1)]; bool var_1793_interleave_0 = const()[name = string("op_1793_interleave_0"), val = bool(false)]; tensor var_1793 = concat(axis = var_1792, interleave = var_1793_interleave_0, values = (var_1790, var_1788_0))[name = string("op_1793")]; tensor var_1794 = mul(x = var_1793, y = sin)[name = string("op_1794")]; tensor q_7 = add(x = var_1787, y = var_1794)[name = string("q_7")]; tensor var_1797 = mul(x = k_5, y = cos)[name = string("op_1797")]; tensor var_1798_split_sizes_0 = const()[name = string("op_1798_split_sizes_0"), val = tensor([32, 32])]; int32 var_1798_axis_0 = const()[name = string("op_1798_axis_0"), val = int32(-1)]; tensor var_1798_0, tensor var_1798_1 = split(axis = var_1798_axis_0, split_sizes = var_1798_split_sizes_0, x = k_5)[name = string("op_1798")]; fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; tensor var_1800 = mul(x = var_1798_1, y = const_18_promoted)[name = string("op_1800")]; int32 var_1802 = const()[name = string("op_1802"), val = int32(-1)]; bool var_1803_interleave_0 = const()[name = string("op_1803_interleave_0"), val = bool(false)]; tensor var_1803 = concat(axis = var_1802, interleave = var_1803_interleave_0, values = (var_1800, var_1798_0))[name = string("op_1803")]; tensor var_1804 = mul(x = var_1803, y = sin)[name = string("op_1804")]; tensor k_7 = add(x = var_1797, y = var_1804)[name = string("k_7")]; tensor var_1809_begin_0 = const()[name = string("op_1809_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_1809_end_0 = const()[name = string("op_1809_end_0"), val = tensor([2, 8, 2048, 64])]; tensor var_1809_end_mask_0 = const()[name = string("op_1809_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1809_squeeze_mask_0 = const()[name = string("op_1809_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1809_cast_fp16 = slice_by_index(begin = var_1809_begin_0, end = var_1809_end_0, end_mask = var_1809_end_mask_0, squeeze_mask = var_1809_squeeze_mask_0, x = coreml_update_state_13)[name = string("op_1809_cast_fp16")]; tensor K_cache_3_axes_0 = const()[name = string("K_cache_3_axes_0"), val = tensor([0])]; tensor K_cache_3_cast_fp16 = expand_dims(axes = K_cache_3_axes_0, x = var_1809_cast_fp16)[name = string("K_cache_3_cast_fp16")]; tensor var_1814_begin_0 = const()[name = string("op_1814_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_1814_end_0 = const()[name = string("op_1814_end_0"), val = tensor([8, 8, 2048, 64])]; tensor var_1814_end_mask_0 = const()[name = string("op_1814_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1814_squeeze_mask_0 = const()[name = string("op_1814_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1814_cast_fp16 = slice_by_index(begin = var_1814_begin_0, end = var_1814_end_0, end_mask = var_1814_end_mask_0, squeeze_mask = var_1814_squeeze_mask_0, x = coreml_update_state_13)[name = string("op_1814_cast_fp16")]; tensor V_cache_3_axes_0 = const()[name = string("V_cache_3_axes_0"), val = tensor([0])]; tensor V_cache_3_cast_fp16 = expand_dims(axes = V_cache_3_axes_0, x = var_1814_cast_fp16)[name = string("V_cache_3_cast_fp16")]; tensor k_b_3_reps_0 = const()[name = string("k_b_3_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b_3 = tile(reps = k_b_3_reps_0, x = k_7)[name = string("k_b_3")]; tensor v_b_3_reps_0 = const()[name = string("v_b_3_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_1753 = transpose(perm = var_1752, x = var_1747)[name = string("transpose_127")]; tensor v_b_3 = tile(reps = v_b_3_reps_0, x = var_1753)[name = string("v_b_3")]; tensor var_1822_cast_fp16 = mul(x = K_cache_3_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1822_cast_fp16")]; tensor var_1823_cast_fp16 = mul(x = k_b_3, y = update_mask)[name = string("op_1823_cast_fp16")]; tensor K_new_3_cast_fp16 = add(x = var_1822_cast_fp16, y = var_1823_cast_fp16)[name = string("K_new_3_cast_fp16")]; tensor var_1829_cast_fp16 = mul(x = V_cache_3_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1829_cast_fp16")]; tensor var_1830_cast_fp16 = mul(x = v_b_3, y = update_mask)[name = string("op_1830_cast_fp16")]; tensor V_new_3_cast_fp16 = add(x = var_1829_cast_fp16, y = var_1830_cast_fp16)[name = string("V_new_3_cast_fp16")]; tensor var_1834_axes_0 = const()[name = string("op_1834_axes_0"), val = tensor([0])]; tensor var_1834_cast_fp16 = squeeze(axes = var_1834_axes_0, x = K_new_3_cast_fp16)[name = string("op_1834_cast_fp16")]; tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 0, 0, 0])]; tensor concat_9 = const()[name = string("concat_9"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_1834_cast_fp16, x = coreml_update_state_13)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_14 = read_state(input = kv_cache_0)[name = string("coreml_update_state_14")]; tensor var_1841_axes_0 = const()[name = string("op_1841_axes_0"), val = tensor([0])]; tensor var_1841_cast_fp16 = squeeze(axes = var_1841_axes_0, x = V_new_3_cast_fp16)[name = string("op_1841_cast_fp16")]; tensor concat_10 = const()[name = string("concat_10"), val = tensor([7, 0, 0, 0])]; tensor concat_11 = const()[name = string("concat_11"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1841_cast_fp16, x = coreml_update_state_14)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_15 = read_state(input = kv_cache_0)[name = string("coreml_update_state_15")]; tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_new_3_cast_fp16)[name = string("transpose_126")]; tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_12, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_125")]; tensor reshape_5_cast_fp16 = reshape(shape = concat_13, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_new_3_cast_fp16)[name = string("transpose_123")]; tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; tensor concat_14 = const()[name = string("concat_14"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_14, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_122")]; tensor reshape_7_cast_fp16 = reshape(shape = concat_15, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; tensor V_e_3_perm_0 = const()[name = string("V_e_3_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_1868_transpose_x_0 = const()[name = string("op_1868_transpose_x_0"), val = bool(false)]; bool var_1868_transpose_y_0 = const()[name = string("op_1868_transpose_y_0"), val = bool(false)]; tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_124")]; tensor var_1868_cast_fp16 = matmul(transpose_x = var_1868_transpose_x_0, transpose_y = var_1868_transpose_y_0, x = q_7, y = transpose_25_cast_fp16)[name = string("op_1868_cast_fp16")]; fp16 var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = fp16(0x1p-3)]; tensor attn_7_cast_fp16 = mul(x = var_1868_cast_fp16, y = var_1869_to_fp16)[name = string("attn_7_cast_fp16")]; tensor attn_9_cast_fp16 = add(x = attn_7_cast_fp16, y = causal_mask)[name = string("attn_9_cast_fp16")]; int32 var_1878 = const()[name = string("op_1878"), val = int32(-1)]; tensor var_1880_cast_fp16 = softmax(axis = var_1878, x = attn_9_cast_fp16)[name = string("op_1880_cast_fp16")]; bool var_1896_transpose_x_0 = const()[name = string("op_1896_transpose_x_0"), val = bool(false)]; bool var_1896_transpose_y_0 = const()[name = string("op_1896_transpose_y_0"), val = bool(false)]; tensor V_e_3_cast_fp16 = transpose(perm = V_e_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_121")]; tensor var_1896_cast_fp16 = matmul(transpose_x = var_1896_transpose_x_0, transpose_y = var_1896_transpose_y_0, x = var_1880_cast_fp16, y = V_e_3_cast_fp16)[name = string("op_1896_cast_fp16")]; tensor var_1906 = const()[name = string("op_1906"), val = tensor([0, 2, 1, 3])]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([1, 1, -1])]; tensor var_1907 = transpose(perm = var_1906, x = var_1896_cast_fp16)[name = string("transpose_120")]; tensor out_7 = reshape(shape = var_1913, x = var_1907)[name = string("out_7")]; tensor var_1918 = const()[name = string("op_1918"), val = tensor([0, 2, 1])]; tensor squeeze_1 = const()[name = string("squeeze_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821090880)))]; string var_1934_pad_type_0 = const()[name = string("op_1934_pad_type_0"), val = string("valid")]; int32 var_1934_groups_0 = const()[name = string("op_1934_groups_0"), val = int32(1)]; tensor var_1934_strides_0 = const()[name = string("op_1934_strides_0"), val = tensor([1])]; tensor var_1934_pad_0 = const()[name = string("op_1934_pad_0"), val = tensor([0, 0])]; tensor var_1934_dilations_0 = const()[name = string("op_1934_dilations_0"), val = tensor([1])]; tensor var_1919 = transpose(perm = var_1918, x = out_7)[name = string("transpose_119")]; tensor var_1934 = conv(dilations = var_1934_dilations_0, groups = var_1934_groups_0, pad = var_1934_pad_0, pad_type = var_1934_pad_type_0, strides = var_1934_strides_0, weight = squeeze_1, x = var_1919)[name = string("op_1934")]; tensor var_1938 = const()[name = string("op_1938"), val = tensor([0, 2, 1])]; tensor op_out_11 = transpose(perm = var_1938, x = var_1934)[name = string("transpose_118")]; tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = op_out_11)[name = string("x_31_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1942_cast_fp16 = mul(x = x_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1942_cast_fp16")]; int32 var_1944 = const()[name = string("op_1944"), val = int32(-1)]; bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; tensor input_85_cast_fp16 = concat(axis = var_1944, interleave = input_85_interleave_0, values = (x_31_cast_fp16, var_1942_cast_fp16))[name = string("input_85_cast_fp16")]; tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; fp16 var_1950_to_fp16 = const()[name = string("op_1950_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1950_to_fp16, x = input_85_cast_fp16)[name = string("normed_41_cast_fp16")]; tensor var_1953_split_sizes_0 = const()[name = string("op_1953_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_1953_axis_0 = const()[name = string("op_1953_axis_0"), val = int32(-1)]; tensor var_1953_cast_fp16_0, tensor var_1953_cast_fp16_1 = split(axis = var_1953_axis_0, split_sizes = var_1953_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1953_cast_fp16")]; tensor layers_5_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823188096)))]; tensor normed_43_cast_fp16 = mul(x = var_1953_cast_fp16_0, y = layers_5_ffn_norm_weight_promoted_to_fp16)[name = string("normed_43_cast_fp16")]; tensor var_1959 = const()[name = string("op_1959"), val = tensor([0, 2, 1])]; tensor var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor([2])]; tensor var_1960_cast_fp16 = transpose(perm = var_1959, x = normed_43_cast_fp16)[name = string("transpose_117")]; tensor var_1962_cast_fp16 = expand_dims(axes = var_1962_axes_0, x = var_1960_cast_fp16)[name = string("op_1962_cast_fp16")]; string input_89_pad_type_0 = const()[name = string("input_89_pad_type_0"), val = string("valid")]; tensor input_89_strides_0 = const()[name = string("input_89_strides_0"), val = tensor([1, 1])]; tensor input_89_pad_0 = const()[name = string("input_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_89_dilations_0 = const()[name = string("input_89_dilations_0"), val = tensor([1, 1])]; int32 input_89_groups_0 = const()[name = string("input_89_groups_0"), val = int32(1)]; tensor input_89 = conv(dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = layers_5_feed_forward_w1_weight, x = var_1962_cast_fp16)[name = string("input_89")]; string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = layers_5_feed_forward_w3_weight, x = var_1962_cast_fp16)[name = string("b_11")]; tensor var_1990 = silu(x = input_89)[name = string("op_1990")]; tensor input_91 = mul(x = var_1990, y = b_11)[name = string("input_91")]; string mlp_21_pad_type_0 = const()[name = string("mlp_21_pad_type_0"), val = string("valid")]; tensor mlp_21_strides_0 = const()[name = string("mlp_21_strides_0"), val = tensor([1, 1])]; tensor mlp_21_pad_0 = const()[name = string("mlp_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_21_dilations_0 = const()[name = string("mlp_21_dilations_0"), val = tensor([1, 1])]; int32 mlp_21_groups_0 = const()[name = string("mlp_21_groups_0"), val = int32(1)]; tensor mlp_21 = conv(dilations = mlp_21_dilations_0, groups = mlp_21_groups_0, pad = mlp_21_pad_0, pad_type = mlp_21_pad_type_0, strides = mlp_21_strides_0, weight = layers_5_feed_forward_w2_weight, x = input_91)[name = string("mlp_21")]; tensor var_2004_axes_0 = const()[name = string("op_2004_axes_0"), val = tensor([2])]; tensor var_2004 = squeeze(axes = var_2004_axes_0, x = mlp_21)[name = string("op_2004")]; tensor var_2008 = const()[name = string("op_2008"), val = tensor([0, 2, 1])]; tensor mlp_23 = transpose(perm = var_2008, x = var_2004)[name = string("transpose_116")]; tensor x_33_cast_fp16 = add(x = x_31_cast_fp16, y = mlp_23)[name = string("x_33_cast_fp16")]; fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2012_cast_fp16 = mul(x = x_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_2012_cast_fp16")]; int32 var_2014 = const()[name = string("op_2014"), val = int32(-1)]; bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; tensor input_93_cast_fp16 = concat(axis = var_2014, interleave = input_93_interleave_0, values = (x_33_cast_fp16, var_2012_cast_fp16))[name = string("input_93_cast_fp16")]; tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; fp16 var_2020_to_fp16 = const()[name = string("op_2020_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2020_to_fp16, x = input_93_cast_fp16)[name = string("normed_45_cast_fp16")]; tensor var_2023_split_sizes_0 = const()[name = string("op_2023_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2023_axis_0 = const()[name = string("op_2023_axis_0"), val = int32(-1)]; tensor var_2023_cast_fp16_0, tensor var_2023_cast_fp16_1 = split(axis = var_2023_axis_0, split_sizes = var_2023_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_2023_cast_fp16")]; tensor layers_6_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823190208)))]; tensor hidden_states_13_cast_fp16 = mul(x = var_2023_cast_fp16_0, y = layers_6_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor var_2029 = const()[name = string("op_2029"), val = tensor([0, 2, 1])]; tensor var_2032_axes_0 = const()[name = string("op_2032_axes_0"), val = tensor([2])]; tensor var_2030_cast_fp16 = transpose(perm = var_2029, x = hidden_states_13_cast_fp16)[name = string("transpose_115")]; tensor var_2032_cast_fp16 = expand_dims(axes = var_2032_axes_0, x = var_2030_cast_fp16)[name = string("op_2032_cast_fp16")]; string BCx_9_pad_type_0 = const()[name = string("BCx_9_pad_type_0"), val = string("valid")]; tensor BCx_9_strides_0 = const()[name = string("BCx_9_strides_0"), val = tensor([1, 1])]; tensor BCx_9_pad_0 = const()[name = string("BCx_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_9_dilations_0 = const()[name = string("BCx_9_dilations_0"), val = tensor([1, 1])]; int32 BCx_9_groups_0 = const()[name = string("BCx_9_groups_0"), val = int32(1)]; tensor BCx_9 = conv(dilations = BCx_9_dilations_0, groups = BCx_9_groups_0, pad = BCx_9_pad_0, pad_type = BCx_9_pad_type_0, strides = BCx_9_strides_0, weight = layers_6_conv_in_proj_weight, x = var_2032_cast_fp16)[name = string("BCx_9")]; tensor var_2049_split_sizes_0 = const()[name = string("op_2049_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_2049_axis_0 = const()[name = string("op_2049_axis_0"), val = int32(1)]; tensor var_2049_0, tensor var_2049_1, tensor var_2049_2 = split(axis = var_2049_axis_0, split_sizes = var_2049_split_sizes_0, x = BCx_9)[name = string("op_2049")]; tensor Bx_9 = mul(x = var_2049_0, y = var_2049_2)[name = string("Bx_9")]; tensor var_2055_begin_0 = const()[name = string("op_2055_begin_0"), val = tensor([4, 0, 0])]; tensor var_2055_end_0 = const()[name = string("op_2055_end_0"), val = tensor([5, 1024, 3])]; tensor var_2055_end_mask_0 = const()[name = string("op_2055_end_mask_0"), val = tensor([false, true, true])]; tensor var_2055_squeeze_mask_0 = const()[name = string("op_2055_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = var_2055_end_0, end_mask = var_2055_end_mask_0, squeeze_mask = var_2055_squeeze_mask_0, x = conv_state_in)[name = string("op_2055_cast_fp16")]; tensor var_2057_axes_0 = const()[name = string("op_2057_axes_0"), val = tensor([0])]; tensor var_2057_cast_fp16 = expand_dims(axes = var_2057_axes_0, x = var_2055_cast_fp16)[name = string("op_2057_cast_fp16")]; tensor slot_9_axes_0 = const()[name = string("slot_9_axes_0"), val = tensor([2])]; tensor slot_9_cast_fp16 = expand_dims(axes = slot_9_axes_0, x = var_2057_cast_fp16)[name = string("slot_9_cast_fp16")]; tensor live_tail_9_begin_0 = const()[name = string("live_tail_9_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_9_end_0 = const()[name = string("live_tail_9_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_9_end_mask_0 = const()[name = string("live_tail_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_9_cast_fp16 = slice_by_index(begin = live_tail_9_begin_0, end = live_tail_9_end_0, end_mask = live_tail_9_end_mask_0, x = slot_9_cast_fp16)[name = string("live_tail_9_cast_fp16")]; int32 var_2066 = const()[name = string("op_2066"), val = int32(-1)]; bool new_state_9_interleave_0 = const()[name = string("new_state_9_interleave_0"), val = bool(false)]; tensor new_state_9_cast_fp16 = concat(axis = var_2066, interleave = new_state_9_interleave_0, values = (live_tail_9_cast_fp16, Bx_9))[name = string("new_state_9_cast_fp16")]; tensor var_2069_axes_0 = const()[name = string("op_2069_axes_0"), val = tensor([0])]; tensor var_2069_cast_fp16 = squeeze(axes = var_2069_axes_0, x = new_state_9_cast_fp16)[name = string("op_2069_cast_fp16")]; tensor var_2071_axes_0 = const()[name = string("op_2071_axes_0"), val = tensor([1])]; tensor var_2071_cast_fp16 = squeeze(axes = var_2071_axes_0, x = var_2069_cast_fp16)[name = string("op_2071_cast_fp16")]; string conv_out_9_pad_type_0 = const()[name = string("conv_out_9_pad_type_0"), val = string("valid")]; int32 conv_out_9_groups_0 = const()[name = string("conv_out_9_groups_0"), val = int32(1024)]; tensor conv_out_9_strides_0 = const()[name = string("conv_out_9_strides_0"), val = tensor([1, 1])]; tensor conv_out_9_pad_0 = const()[name = string("conv_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_9_dilations_0 = const()[name = string("conv_out_9_dilations_0"), val = tensor([1, 1])]; tensor layers_6_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_6_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823192320)))]; tensor conv_out_9_cast_fp16 = conv(dilations = conv_out_9_dilations_0, groups = conv_out_9_groups_0, pad = conv_out_9_pad_0, pad_type = conv_out_9_pad_type_0, strides = conv_out_9_strides_0, weight = layers_6_conv_conv_weight_promoted_to_fp16, x = new_state_9_cast_fp16)[name = string("conv_out_9_cast_fp16")]; tensor input_97_cast_fp16 = mul(x = var_2049_1, y = conv_out_9_cast_fp16)[name = string("input_97_cast_fp16")]; string y_9_pad_type_0 = const()[name = string("y_9_pad_type_0"), val = string("valid")]; tensor y_9_strides_0 = const()[name = string("y_9_strides_0"), val = tensor([1, 1])]; tensor y_9_pad_0 = const()[name = string("y_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_9_dilations_0 = const()[name = string("y_9_dilations_0"), val = tensor([1, 1])]; int32 y_9_groups_0 = const()[name = string("y_9_groups_0"), val = int32(1)]; tensor layers_6_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_6_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823198528)))]; tensor y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = layers_6_conv_out_proj_weight_promoted_to_fp16, x = input_97_cast_fp16)[name = string("y_9_cast_fp16")]; tensor var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor([2])]; tensor var_2097_cast_fp16 = squeeze(axes = var_2097_axes_0, x = y_9_cast_fp16)[name = string("op_2097_cast_fp16")]; tensor var_2101 = const()[name = string("op_2101"), val = tensor([0, 2, 1])]; tensor op_out_13_cast_fp16 = transpose(perm = var_2101, x = var_2097_cast_fp16)[name = string("transpose_114")]; tensor x_35_cast_fp16 = add(x = x_33_cast_fp16, y = op_out_13_cast_fp16)[name = string("x_35_cast_fp16")]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2105_cast_fp16 = mul(x = x_35_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_2105_cast_fp16")]; int32 var_2107 = const()[name = string("op_2107"), val = int32(-1)]; bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; tensor input_99_cast_fp16 = concat(axis = var_2107, interleave = input_99_interleave_0, values = (x_35_cast_fp16, var_2105_cast_fp16))[name = string("input_99_cast_fp16")]; tensor normed_47_axes_0 = const()[name = string("normed_47_axes_0"), val = tensor([-1])]; fp16 var_2113_to_fp16 = const()[name = string("op_2113_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_47_cast_fp16 = layer_norm(axes = normed_47_axes_0, epsilon = var_2113_to_fp16, x = input_99_cast_fp16)[name = string("normed_47_cast_fp16")]; tensor var_2116_split_sizes_0 = const()[name = string("op_2116_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2116_axis_0 = const()[name = string("op_2116_axis_0"), val = int32(-1)]; tensor var_2116_cast_fp16_0, tensor var_2116_cast_fp16_1 = split(axis = var_2116_axis_0, split_sizes = var_2116_split_sizes_0, x = normed_47_cast_fp16)[name = string("op_2116_cast_fp16")]; tensor layers_6_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825295744)))]; tensor normed_49_cast_fp16 = mul(x = var_2116_cast_fp16_0, y = layers_6_ffn_norm_weight_promoted_to_fp16)[name = string("normed_49_cast_fp16")]; tensor var_2122 = const()[name = string("op_2122"), val = tensor([0, 2, 1])]; tensor var_2125_axes_0 = const()[name = string("op_2125_axes_0"), val = tensor([2])]; tensor var_2123_cast_fp16 = transpose(perm = var_2122, x = normed_49_cast_fp16)[name = string("transpose_113")]; tensor var_2125_cast_fp16 = expand_dims(axes = var_2125_axes_0, x = var_2123_cast_fp16)[name = string("op_2125_cast_fp16")]; string input_103_pad_type_0 = const()[name = string("input_103_pad_type_0"), val = string("valid")]; tensor input_103_strides_0 = const()[name = string("input_103_strides_0"), val = tensor([1, 1])]; tensor input_103_pad_0 = const()[name = string("input_103_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_103_dilations_0 = const()[name = string("input_103_dilations_0"), val = tensor([1, 1])]; int32 input_103_groups_0 = const()[name = string("input_103_groups_0"), val = int32(1)]; tensor input_103 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = layers_6_feed_forward_w1_weight, x = var_2125_cast_fp16)[name = string("input_103")]; string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = layers_6_feed_forward_w3_weight, x = var_2125_cast_fp16)[name = string("b_13")]; tensor var_2153 = silu(x = input_103)[name = string("op_2153")]; tensor input_105 = mul(x = var_2153, y = b_13)[name = string("input_105")]; string mlp_25_pad_type_0 = const()[name = string("mlp_25_pad_type_0"), val = string("valid")]; tensor mlp_25_strides_0 = const()[name = string("mlp_25_strides_0"), val = tensor([1, 1])]; tensor mlp_25_pad_0 = const()[name = string("mlp_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_25_dilations_0 = const()[name = string("mlp_25_dilations_0"), val = tensor([1, 1])]; int32 mlp_25_groups_0 = const()[name = string("mlp_25_groups_0"), val = int32(1)]; tensor mlp_25 = conv(dilations = mlp_25_dilations_0, groups = mlp_25_groups_0, pad = mlp_25_pad_0, pad_type = mlp_25_pad_type_0, strides = mlp_25_strides_0, weight = layers_6_feed_forward_w2_weight, x = input_105)[name = string("mlp_25")]; tensor var_2167_axes_0 = const()[name = string("op_2167_axes_0"), val = tensor([2])]; tensor var_2167 = squeeze(axes = var_2167_axes_0, x = mlp_25)[name = string("op_2167")]; tensor var_2171 = const()[name = string("op_2171"), val = tensor([0, 2, 1])]; tensor mlp_27 = transpose(perm = var_2171, x = var_2167)[name = string("transpose_112")]; tensor x_37_cast_fp16 = add(x = x_35_cast_fp16, y = mlp_27)[name = string("x_37_cast_fp16")]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2175_cast_fp16 = mul(x = x_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_2175_cast_fp16")]; int32 var_2177 = const()[name = string("op_2177"), val = int32(-1)]; bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; tensor input_107_cast_fp16 = concat(axis = var_2177, interleave = input_107_interleave_0, values = (x_37_cast_fp16, var_2175_cast_fp16))[name = string("input_107_cast_fp16")]; tensor normed_51_axes_0 = const()[name = string("normed_51_axes_0"), val = tensor([-1])]; fp16 var_2183_to_fp16 = const()[name = string("op_2183_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_51_cast_fp16 = layer_norm(axes = normed_51_axes_0, epsilon = var_2183_to_fp16, x = input_107_cast_fp16)[name = string("normed_51_cast_fp16")]; tensor var_2186_split_sizes_0 = const()[name = string("op_2186_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2186_axis_0 = const()[name = string("op_2186_axis_0"), val = int32(-1)]; tensor var_2186_cast_fp16_0, tensor var_2186_cast_fp16_1 = split(axis = var_2186_axis_0, split_sizes = var_2186_split_sizes_0, x = normed_51_cast_fp16)[name = string("op_2186_cast_fp16")]; tensor layers_7_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825297856)))]; tensor hidden_states_15_cast_fp16 = mul(x = var_2186_cast_fp16_0, y = layers_7_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor var_2192 = const()[name = string("op_2192"), val = tensor([0, 2, 1])]; tensor var_2195_axes_0 = const()[name = string("op_2195_axes_0"), val = tensor([2])]; tensor var_2193_cast_fp16 = transpose(perm = var_2192, x = hidden_states_15_cast_fp16)[name = string("transpose_111")]; tensor var_2195_cast_fp16 = expand_dims(axes = var_2195_axes_0, x = var_2193_cast_fp16)[name = string("op_2195_cast_fp16")]; string BCx_11_pad_type_0 = const()[name = string("BCx_11_pad_type_0"), val = string("valid")]; tensor BCx_11_strides_0 = const()[name = string("BCx_11_strides_0"), val = tensor([1, 1])]; tensor BCx_11_pad_0 = const()[name = string("BCx_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_11_dilations_0 = const()[name = string("BCx_11_dilations_0"), val = tensor([1, 1])]; int32 BCx_11_groups_0 = const()[name = string("BCx_11_groups_0"), val = int32(1)]; tensor BCx_11 = conv(dilations = BCx_11_dilations_0, groups = BCx_11_groups_0, pad = BCx_11_pad_0, pad_type = BCx_11_pad_type_0, strides = BCx_11_strides_0, weight = layers_7_conv_in_proj_weight, x = var_2195_cast_fp16)[name = string("BCx_11")]; tensor var_2212_split_sizes_0 = const()[name = string("op_2212_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_2212_axis_0 = const()[name = string("op_2212_axis_0"), val = int32(1)]; tensor var_2212_0, tensor var_2212_1, tensor var_2212_2 = split(axis = var_2212_axis_0, split_sizes = var_2212_split_sizes_0, x = BCx_11)[name = string("op_2212")]; tensor Bx_11 = mul(x = var_2212_0, y = var_2212_2)[name = string("Bx_11")]; tensor var_2218_begin_0 = const()[name = string("op_2218_begin_0"), val = tensor([5, 0, 0])]; tensor var_2218_end_0 = const()[name = string("op_2218_end_0"), val = tensor([6, 1024, 3])]; tensor var_2218_end_mask_0 = const()[name = string("op_2218_end_mask_0"), val = tensor([false, true, true])]; tensor var_2218_squeeze_mask_0 = const()[name = string("op_2218_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_2218_cast_fp16 = slice_by_index(begin = var_2218_begin_0, end = var_2218_end_0, end_mask = var_2218_end_mask_0, squeeze_mask = var_2218_squeeze_mask_0, x = conv_state_in)[name = string("op_2218_cast_fp16")]; tensor var_2220_axes_0 = const()[name = string("op_2220_axes_0"), val = tensor([0])]; tensor var_2220_cast_fp16 = expand_dims(axes = var_2220_axes_0, x = var_2218_cast_fp16)[name = string("op_2220_cast_fp16")]; tensor slot_11_axes_0 = const()[name = string("slot_11_axes_0"), val = tensor([2])]; tensor slot_11_cast_fp16 = expand_dims(axes = slot_11_axes_0, x = var_2220_cast_fp16)[name = string("slot_11_cast_fp16")]; tensor live_tail_11_begin_0 = const()[name = string("live_tail_11_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_11_end_0 = const()[name = string("live_tail_11_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_11_end_mask_0 = const()[name = string("live_tail_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_11_cast_fp16 = slice_by_index(begin = live_tail_11_begin_0, end = live_tail_11_end_0, end_mask = live_tail_11_end_mask_0, x = slot_11_cast_fp16)[name = string("live_tail_11_cast_fp16")]; int32 var_2229 = const()[name = string("op_2229"), val = int32(-1)]; bool new_state_11_interleave_0 = const()[name = string("new_state_11_interleave_0"), val = bool(false)]; tensor new_state_11_cast_fp16 = concat(axis = var_2229, interleave = new_state_11_interleave_0, values = (live_tail_11_cast_fp16, Bx_11))[name = string("new_state_11_cast_fp16")]; tensor var_2232_axes_0 = const()[name = string("op_2232_axes_0"), val = tensor([0])]; tensor var_2232_cast_fp16 = squeeze(axes = var_2232_axes_0, x = new_state_11_cast_fp16)[name = string("op_2232_cast_fp16")]; tensor var_2234_axes_0 = const()[name = string("op_2234_axes_0"), val = tensor([1])]; tensor var_2234_cast_fp16 = squeeze(axes = var_2234_axes_0, x = var_2232_cast_fp16)[name = string("op_2234_cast_fp16")]; string conv_out_11_pad_type_0 = const()[name = string("conv_out_11_pad_type_0"), val = string("valid")]; int32 conv_out_11_groups_0 = const()[name = string("conv_out_11_groups_0"), val = int32(1024)]; tensor conv_out_11_strides_0 = const()[name = string("conv_out_11_strides_0"), val = tensor([1, 1])]; tensor conv_out_11_pad_0 = const()[name = string("conv_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_11_dilations_0 = const()[name = string("conv_out_11_dilations_0"), val = tensor([1, 1])]; tensor layers_7_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_7_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825299968)))]; tensor conv_out_11_cast_fp16 = conv(dilations = conv_out_11_dilations_0, groups = conv_out_11_groups_0, pad = conv_out_11_pad_0, pad_type = conv_out_11_pad_type_0, strides = conv_out_11_strides_0, weight = layers_7_conv_conv_weight_promoted_to_fp16, x = new_state_11_cast_fp16)[name = string("conv_out_11_cast_fp16")]; tensor input_111_cast_fp16 = mul(x = var_2212_1, y = conv_out_11_cast_fp16)[name = string("input_111_cast_fp16")]; string y_11_pad_type_0 = const()[name = string("y_11_pad_type_0"), val = string("valid")]; tensor y_11_strides_0 = const()[name = string("y_11_strides_0"), val = tensor([1, 1])]; tensor y_11_pad_0 = const()[name = string("y_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_11_dilations_0 = const()[name = string("y_11_dilations_0"), val = tensor([1, 1])]; int32 y_11_groups_0 = const()[name = string("y_11_groups_0"), val = int32(1)]; tensor layers_7_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_7_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825306176)))]; tensor y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = layers_7_conv_out_proj_weight_promoted_to_fp16, x = input_111_cast_fp16)[name = string("y_11_cast_fp16")]; tensor var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor([2])]; tensor var_2260_cast_fp16 = squeeze(axes = var_2260_axes_0, x = y_11_cast_fp16)[name = string("op_2260_cast_fp16")]; tensor var_2264 = const()[name = string("op_2264"), val = tensor([0, 2, 1])]; tensor op_out_15_cast_fp16 = transpose(perm = var_2264, x = var_2260_cast_fp16)[name = string("transpose_110")]; tensor x_39_cast_fp16 = add(x = x_37_cast_fp16, y = op_out_15_cast_fp16)[name = string("x_39_cast_fp16")]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2268_cast_fp16 = mul(x = x_39_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2268_cast_fp16")]; int32 var_2270 = const()[name = string("op_2270"), val = int32(-1)]; bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; tensor input_113_cast_fp16 = concat(axis = var_2270, interleave = input_113_interleave_0, values = (x_39_cast_fp16, var_2268_cast_fp16))[name = string("input_113_cast_fp16")]; tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; fp16 var_2276_to_fp16 = const()[name = string("op_2276_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_2276_to_fp16, x = input_113_cast_fp16)[name = string("normed_53_cast_fp16")]; tensor var_2279_split_sizes_0 = const()[name = string("op_2279_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2279_axis_0 = const()[name = string("op_2279_axis_0"), val = int32(-1)]; tensor var_2279_cast_fp16_0, tensor var_2279_cast_fp16_1 = split(axis = var_2279_axis_0, split_sizes = var_2279_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_2279_cast_fp16")]; tensor layers_7_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827403392)))]; tensor normed_55_cast_fp16 = mul(x = var_2279_cast_fp16_0, y = layers_7_ffn_norm_weight_promoted_to_fp16)[name = string("normed_55_cast_fp16")]; tensor var_2285 = const()[name = string("op_2285"), val = tensor([0, 2, 1])]; tensor var_2288_axes_0 = const()[name = string("op_2288_axes_0"), val = tensor([2])]; tensor var_2286_cast_fp16 = transpose(perm = var_2285, x = normed_55_cast_fp16)[name = string("transpose_109")]; tensor var_2288_cast_fp16 = expand_dims(axes = var_2288_axes_0, x = var_2286_cast_fp16)[name = string("op_2288_cast_fp16")]; string input_117_pad_type_0 = const()[name = string("input_117_pad_type_0"), val = string("valid")]; tensor input_117_strides_0 = const()[name = string("input_117_strides_0"), val = tensor([1, 1])]; tensor input_117_pad_0 = const()[name = string("input_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_117_dilations_0 = const()[name = string("input_117_dilations_0"), val = tensor([1, 1])]; int32 input_117_groups_0 = const()[name = string("input_117_groups_0"), val = int32(1)]; tensor input_117 = conv(dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = layers_7_feed_forward_w1_weight, x = var_2288_cast_fp16)[name = string("input_117")]; string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = layers_7_feed_forward_w3_weight, x = var_2288_cast_fp16)[name = string("b_15")]; tensor var_2316 = silu(x = input_117)[name = string("op_2316")]; tensor input_119 = mul(x = var_2316, y = b_15)[name = string("input_119")]; string mlp_29_pad_type_0 = const()[name = string("mlp_29_pad_type_0"), val = string("valid")]; tensor mlp_29_strides_0 = const()[name = string("mlp_29_strides_0"), val = tensor([1, 1])]; tensor mlp_29_pad_0 = const()[name = string("mlp_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_29_dilations_0 = const()[name = string("mlp_29_dilations_0"), val = tensor([1, 1])]; int32 mlp_29_groups_0 = const()[name = string("mlp_29_groups_0"), val = int32(1)]; tensor mlp_29 = conv(dilations = mlp_29_dilations_0, groups = mlp_29_groups_0, pad = mlp_29_pad_0, pad_type = mlp_29_pad_type_0, strides = mlp_29_strides_0, weight = layers_7_feed_forward_w2_weight, x = input_119)[name = string("mlp_29")]; tensor var_2330_axes_0 = const()[name = string("op_2330_axes_0"), val = tensor([2])]; tensor var_2330 = squeeze(axes = var_2330_axes_0, x = mlp_29)[name = string("op_2330")]; tensor var_2334 = const()[name = string("op_2334"), val = tensor([0, 2, 1])]; tensor mlp_31 = transpose(perm = var_2334, x = var_2330)[name = string("transpose_108")]; tensor x_41_cast_fp16 = add(x = x_39_cast_fp16, y = mlp_31)[name = string("x_41_cast_fp16")]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2338_cast_fp16 = mul(x = x_41_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2338_cast_fp16")]; int32 var_2340 = const()[name = string("op_2340"), val = int32(-1)]; bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; tensor input_121_cast_fp16 = concat(axis = var_2340, interleave = input_121_interleave_0, values = (x_41_cast_fp16, var_2338_cast_fp16))[name = string("input_121_cast_fp16")]; tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; fp16 var_2346_to_fp16 = const()[name = string("op_2346_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_2346_to_fp16, x = input_121_cast_fp16)[name = string("normed_57_cast_fp16")]; tensor var_2349_split_sizes_0 = const()[name = string("op_2349_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2349_axis_0 = const()[name = string("op_2349_axis_0"), val = int32(-1)]; tensor var_2349_cast_fp16_0, tensor var_2349_cast_fp16_1 = split(axis = var_2349_axis_0, split_sizes = var_2349_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_2349_cast_fp16")]; tensor layers_8_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827405504)))]; tensor hidden_states_17_cast_fp16 = mul(x = var_2349_cast_fp16_0, y = layers_8_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor var_2355 = const()[name = string("op_2355"), val = tensor([0, 2, 1])]; tensor var_2358_axes_0 = const()[name = string("op_2358_axes_0"), val = tensor([2])]; tensor var_2356_cast_fp16 = transpose(perm = var_2355, x = hidden_states_17_cast_fp16)[name = string("transpose_107")]; tensor var_2358_cast_fp16 = expand_dims(axes = var_2358_axes_0, x = var_2356_cast_fp16)[name = string("op_2358_cast_fp16")]; string var_2374_pad_type_0 = const()[name = string("op_2374_pad_type_0"), val = string("valid")]; tensor var_2374_strides_0 = const()[name = string("op_2374_strides_0"), val = tensor([1, 1])]; tensor var_2374_pad_0 = const()[name = string("op_2374_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2374_dilations_0 = const()[name = string("op_2374_dilations_0"), val = tensor([1, 1])]; int32 var_2374_groups_0 = const()[name = string("op_2374_groups_0"), val = int32(1)]; tensor var_2374 = conv(dilations = var_2374_dilations_0, groups = var_2374_groups_0, pad = var_2374_pad_0, pad_type = var_2374_pad_type_0, strides = var_2374_strides_0, weight = layers_8_self_attn_q_proj_weight, x = var_2358_cast_fp16)[name = string("op_2374")]; tensor var_2379 = const()[name = string("op_2379"), val = tensor([1, 16, 64, 1])]; tensor var_2380 = reshape(shape = var_2379, x = var_2374)[name = string("op_2380")]; tensor var_2385 = const()[name = string("op_2385"), val = tensor([0, 1, 3, 2])]; string var_2402_pad_type_0 = const()[name = string("op_2402_pad_type_0"), val = string("valid")]; tensor var_2402_strides_0 = const()[name = string("op_2402_strides_0"), val = tensor([1, 1])]; tensor var_2402_pad_0 = const()[name = string("op_2402_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2402_dilations_0 = const()[name = string("op_2402_dilations_0"), val = tensor([1, 1])]; int32 var_2402_groups_0 = const()[name = string("op_2402_groups_0"), val = int32(1)]; tensor var_2402 = conv(dilations = var_2402_dilations_0, groups = var_2402_groups_0, pad = var_2402_pad_0, pad_type = var_2402_pad_type_0, strides = var_2402_strides_0, weight = layers_8_self_attn_k_proj_weight, x = var_2358_cast_fp16)[name = string("op_2402")]; tensor var_2407 = const()[name = string("op_2407"), val = tensor([1, 8, 64, 1])]; tensor var_2408 = reshape(shape = var_2407, x = var_2402)[name = string("op_2408")]; tensor var_2413 = const()[name = string("op_2413"), val = tensor([0, 1, 3, 2])]; string var_2430_pad_type_0 = const()[name = string("op_2430_pad_type_0"), val = string("valid")]; tensor var_2430_strides_0 = const()[name = string("op_2430_strides_0"), val = tensor([1, 1])]; tensor var_2430_pad_0 = const()[name = string("op_2430_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2430_dilations_0 = const()[name = string("op_2430_dilations_0"), val = tensor([1, 1])]; int32 var_2430_groups_0 = const()[name = string("op_2430_groups_0"), val = int32(1)]; tensor var_2430 = conv(dilations = var_2430_dilations_0, groups = var_2430_groups_0, pad = var_2430_pad_0, pad_type = var_2430_pad_type_0, strides = var_2430_strides_0, weight = layers_8_self_attn_v_proj_weight, x = var_2358_cast_fp16)[name = string("op_2430")]; tensor var_2435 = const()[name = string("op_2435"), val = tensor([1, 8, 64, 1])]; tensor var_2436 = reshape(shape = var_2435, x = var_2430)[name = string("op_2436")]; tensor var_2441 = const()[name = string("op_2441"), val = tensor([0, 1, 3, 2])]; fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; tensor var_2386 = transpose(perm = var_2385, x = var_2380)[name = string("transpose_106")]; tensor var_2448 = mul(x = var_2386, y = const_25_promoted)[name = string("op_2448")]; int32 var_2450 = const()[name = string("op_2450"), val = int32(-1)]; bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; tensor input_125 = concat(axis = var_2450, interleave = input_125_interleave_0, values = (var_2386, var_2448))[name = string("input_125")]; tensor normed_59_axes_0 = const()[name = string("normed_59_axes_0"), val = tensor([-1])]; fp16 var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_59_cast_fp16 = layer_norm(axes = normed_59_axes_0, epsilon = var_2456_to_fp16, x = input_125)[name = string("normed_59_cast_fp16")]; tensor var_2459_split_sizes_0 = const()[name = string("op_2459_split_sizes_0"), val = tensor([64, 64])]; int32 var_2459_axis_0 = const()[name = string("op_2459_axis_0"), val = int32(-1)]; tensor var_2459_0, tensor var_2459_1 = split(axis = var_2459_axis_0, split_sizes = var_2459_split_sizes_0, x = normed_59_cast_fp16)[name = string("op_2459")]; tensor q_9 = mul(x = var_2459_0, y = layers_8_self_attn_q_layernorm_weight)[name = string("q_9")]; fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; tensor var_2414 = transpose(perm = var_2413, x = var_2408)[name = string("transpose_105")]; tensor var_2462 = mul(x = var_2414, y = const_26_promoted)[name = string("op_2462")]; int32 var_2464 = const()[name = string("op_2464"), val = int32(-1)]; bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; tensor input_127 = concat(axis = var_2464, interleave = input_127_interleave_0, values = (var_2414, var_2462))[name = string("input_127")]; tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; fp16 var_2470_to_fp16 = const()[name = string("op_2470_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2470_to_fp16, x = input_127)[name = string("normed_61_cast_fp16")]; tensor var_2473_split_sizes_0 = const()[name = string("op_2473_split_sizes_0"), val = tensor([64, 64])]; int32 var_2473_axis_0 = const()[name = string("op_2473_axis_0"), val = int32(-1)]; tensor var_2473_0, tensor var_2473_1 = split(axis = var_2473_axis_0, split_sizes = var_2473_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2473")]; tensor k_9 = mul(x = var_2473_0, y = layers_8_self_attn_k_layernorm_weight)[name = string("k_9")]; tensor var_2476 = mul(x = q_9, y = cos)[name = string("op_2476")]; tensor var_2477_split_sizes_0 = const()[name = string("op_2477_split_sizes_0"), val = tensor([32, 32])]; int32 var_2477_axis_0 = const()[name = string("op_2477_axis_0"), val = int32(-1)]; tensor var_2477_0, tensor var_2477_1 = split(axis = var_2477_axis_0, split_sizes = var_2477_split_sizes_0, x = q_9)[name = string("op_2477")]; fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; tensor var_2479 = mul(x = var_2477_1, y = const_27_promoted)[name = string("op_2479")]; int32 var_2481 = const()[name = string("op_2481"), val = int32(-1)]; bool var_2482_interleave_0 = const()[name = string("op_2482_interleave_0"), val = bool(false)]; tensor var_2482 = concat(axis = var_2481, interleave = var_2482_interleave_0, values = (var_2479, var_2477_0))[name = string("op_2482")]; tensor var_2483 = mul(x = var_2482, y = sin)[name = string("op_2483")]; tensor q_11 = add(x = var_2476, y = var_2483)[name = string("q_11")]; tensor var_2486 = mul(x = k_9, y = cos)[name = string("op_2486")]; tensor var_2487_split_sizes_0 = const()[name = string("op_2487_split_sizes_0"), val = tensor([32, 32])]; int32 var_2487_axis_0 = const()[name = string("op_2487_axis_0"), val = int32(-1)]; tensor var_2487_0, tensor var_2487_1 = split(axis = var_2487_axis_0, split_sizes = var_2487_split_sizes_0, x = k_9)[name = string("op_2487")]; fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; tensor var_2489 = mul(x = var_2487_1, y = const_28_promoted)[name = string("op_2489")]; int32 var_2491 = const()[name = string("op_2491"), val = int32(-1)]; bool var_2492_interleave_0 = const()[name = string("op_2492_interleave_0"), val = bool(false)]; tensor var_2492 = concat(axis = var_2491, interleave = var_2492_interleave_0, values = (var_2489, var_2487_0))[name = string("op_2492")]; tensor var_2493 = mul(x = var_2492, y = sin)[name = string("op_2493")]; tensor k_11 = add(x = var_2486, y = var_2493)[name = string("k_11")]; tensor var_2498_begin_0 = const()[name = string("op_2498_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2498_end_0 = const()[name = string("op_2498_end_0"), val = tensor([3, 8, 2048, 64])]; tensor var_2498_end_mask_0 = const()[name = string("op_2498_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2498_squeeze_mask_0 = const()[name = string("op_2498_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2498_cast_fp16 = slice_by_index(begin = var_2498_begin_0, end = var_2498_end_0, end_mask = var_2498_end_mask_0, squeeze_mask = var_2498_squeeze_mask_0, x = coreml_update_state_15)[name = string("op_2498_cast_fp16")]; tensor K_cache_5_axes_0 = const()[name = string("K_cache_5_axes_0"), val = tensor([0])]; tensor K_cache_5_cast_fp16 = expand_dims(axes = K_cache_5_axes_0, x = var_2498_cast_fp16)[name = string("K_cache_5_cast_fp16")]; tensor var_2503_begin_0 = const()[name = string("op_2503_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_2503_end_0 = const()[name = string("op_2503_end_0"), val = tensor([9, 8, 2048, 64])]; tensor var_2503_end_mask_0 = const()[name = string("op_2503_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2503_squeeze_mask_0 = const()[name = string("op_2503_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2503_cast_fp16 = slice_by_index(begin = var_2503_begin_0, end = var_2503_end_0, end_mask = var_2503_end_mask_0, squeeze_mask = var_2503_squeeze_mask_0, x = coreml_update_state_15)[name = string("op_2503_cast_fp16")]; tensor V_cache_5_axes_0 = const()[name = string("V_cache_5_axes_0"), val = tensor([0])]; tensor V_cache_5_cast_fp16 = expand_dims(axes = V_cache_5_axes_0, x = var_2503_cast_fp16)[name = string("V_cache_5_cast_fp16")]; tensor k_b_5_reps_0 = const()[name = string("k_b_5_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b_5 = tile(reps = k_b_5_reps_0, x = k_11)[name = string("k_b_5")]; tensor v_b_5_reps_0 = const()[name = string("v_b_5_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_2442 = transpose(perm = var_2441, x = var_2436)[name = string("transpose_104")]; tensor v_b_5 = tile(reps = v_b_5_reps_0, x = var_2442)[name = string("v_b_5")]; tensor var_2511_cast_fp16 = mul(x = K_cache_5_cast_fp16, y = var_1132_cast_fp16)[name = string("op_2511_cast_fp16")]; tensor var_2512_cast_fp16 = mul(x = k_b_5, y = update_mask)[name = string("op_2512_cast_fp16")]; tensor K_new_5_cast_fp16 = add(x = var_2511_cast_fp16, y = var_2512_cast_fp16)[name = string("K_new_5_cast_fp16")]; tensor var_2518_cast_fp16 = mul(x = V_cache_5_cast_fp16, y = var_1132_cast_fp16)[name = string("op_2518_cast_fp16")]; tensor var_2519_cast_fp16 = mul(x = v_b_5, y = update_mask)[name = string("op_2519_cast_fp16")]; tensor V_new_5_cast_fp16 = add(x = var_2518_cast_fp16, y = var_2519_cast_fp16)[name = string("V_new_5_cast_fp16")]; tensor var_2523_axes_0 = const()[name = string("op_2523_axes_0"), val = tensor([0])]; tensor var_2523_cast_fp16 = squeeze(axes = var_2523_axes_0, x = K_new_5_cast_fp16)[name = string("op_2523_cast_fp16")]; tensor concat_16 = const()[name = string("concat_16"), val = tensor([2, 0, 0, 0])]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_2523_cast_fp16, x = coreml_update_state_15)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_16 = read_state(input = kv_cache_0)[name = string("coreml_update_state_16")]; tensor var_2530_axes_0 = const()[name = string("op_2530_axes_0"), val = tensor([0])]; tensor var_2530_cast_fp16 = squeeze(axes = var_2530_axes_0, x = V_new_5_cast_fp16)[name = string("op_2530_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([8, 0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2530_cast_fp16, x = coreml_update_state_16)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_17 = read_state(input = kv_cache_0)[name = string("coreml_update_state_17")]; tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_new_5_cast_fp16)[name = string("transpose_103")]; tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_20, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_102")]; tensor reshape_9_cast_fp16 = reshape(shape = concat_21, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_new_5_cast_fp16)[name = string("transpose_100")]; tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; tensor concat_22 = const()[name = string("concat_22"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_22, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_99")]; tensor reshape_11_cast_fp16 = reshape(shape = concat_23, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor V_e_5_perm_0 = const()[name = string("V_e_5_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_2557_transpose_x_0 = const()[name = string("op_2557_transpose_x_0"), val = bool(false)]; bool var_2557_transpose_y_0 = const()[name = string("op_2557_transpose_y_0"), val = bool(false)]; tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_101")]; tensor var_2557_cast_fp16 = matmul(transpose_x = var_2557_transpose_x_0, transpose_y = var_2557_transpose_y_0, x = q_11, y = transpose_26_cast_fp16)[name = string("op_2557_cast_fp16")]; fp16 var_2558_to_fp16 = const()[name = string("op_2558_to_fp16"), val = fp16(0x1p-3)]; tensor attn_13_cast_fp16 = mul(x = var_2557_cast_fp16, y = var_2558_to_fp16)[name = string("attn_13_cast_fp16")]; tensor attn_15_cast_fp16 = add(x = attn_13_cast_fp16, y = causal_mask)[name = string("attn_15_cast_fp16")]; int32 var_2567 = const()[name = string("op_2567"), val = int32(-1)]; tensor var_2569_cast_fp16 = softmax(axis = var_2567, x = attn_15_cast_fp16)[name = string("op_2569_cast_fp16")]; bool var_2585_transpose_x_0 = const()[name = string("op_2585_transpose_x_0"), val = bool(false)]; bool var_2585_transpose_y_0 = const()[name = string("op_2585_transpose_y_0"), val = bool(false)]; tensor V_e_5_cast_fp16 = transpose(perm = V_e_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_98")]; tensor var_2585_cast_fp16 = matmul(transpose_x = var_2585_transpose_x_0, transpose_y = var_2585_transpose_y_0, x = var_2569_cast_fp16, y = V_e_5_cast_fp16)[name = string("op_2585_cast_fp16")]; tensor var_2595 = const()[name = string("op_2595"), val = tensor([0, 2, 1, 3])]; tensor var_2602 = const()[name = string("op_2602"), val = tensor([1, 1, -1])]; tensor var_2596 = transpose(perm = var_2595, x = var_2585_cast_fp16)[name = string("transpose_97")]; tensor out_11 = reshape(shape = var_2602, x = var_2596)[name = string("out_11")]; tensor var_2607 = const()[name = string("op_2607"), val = tensor([0, 2, 1])]; tensor squeeze_2 = const()[name = string("squeeze_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827407616)))]; string var_2623_pad_type_0 = const()[name = string("op_2623_pad_type_0"), val = string("valid")]; int32 var_2623_groups_0 = const()[name = string("op_2623_groups_0"), val = int32(1)]; tensor var_2623_strides_0 = const()[name = string("op_2623_strides_0"), val = tensor([1])]; tensor var_2623_pad_0 = const()[name = string("op_2623_pad_0"), val = tensor([0, 0])]; tensor var_2623_dilations_0 = const()[name = string("op_2623_dilations_0"), val = tensor([1])]; tensor var_2608 = transpose(perm = var_2607, x = out_11)[name = string("transpose_96")]; tensor var_2623 = conv(dilations = var_2623_dilations_0, groups = var_2623_groups_0, pad = var_2623_pad_0, pad_type = var_2623_pad_type_0, strides = var_2623_strides_0, weight = squeeze_2, x = var_2608)[name = string("op_2623")]; tensor var_2627 = const()[name = string("op_2627"), val = tensor([0, 2, 1])]; tensor op_out_17 = transpose(perm = var_2627, x = var_2623)[name = string("transpose_95")]; tensor x_47_cast_fp16 = add(x = x_41_cast_fp16, y = op_out_17)[name = string("x_47_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2631_cast_fp16 = mul(x = x_47_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2631_cast_fp16")]; int32 var_2633 = const()[name = string("op_2633"), val = int32(-1)]; bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; tensor input_131_cast_fp16 = concat(axis = var_2633, interleave = input_131_interleave_0, values = (x_47_cast_fp16, var_2631_cast_fp16))[name = string("input_131_cast_fp16")]; tensor normed_63_axes_0 = const()[name = string("normed_63_axes_0"), val = tensor([-1])]; fp16 var_2639_to_fp16 = const()[name = string("op_2639_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_63_cast_fp16 = layer_norm(axes = normed_63_axes_0, epsilon = var_2639_to_fp16, x = input_131_cast_fp16)[name = string("normed_63_cast_fp16")]; tensor var_2642_split_sizes_0 = const()[name = string("op_2642_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2642_axis_0 = const()[name = string("op_2642_axis_0"), val = int32(-1)]; tensor var_2642_cast_fp16_0, tensor var_2642_cast_fp16_1 = split(axis = var_2642_axis_0, split_sizes = var_2642_split_sizes_0, x = normed_63_cast_fp16)[name = string("op_2642_cast_fp16")]; tensor layers_8_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829504832)))]; tensor normed_65_cast_fp16 = mul(x = var_2642_cast_fp16_0, y = layers_8_ffn_norm_weight_promoted_to_fp16)[name = string("normed_65_cast_fp16")]; tensor var_2648 = const()[name = string("op_2648"), val = tensor([0, 2, 1])]; tensor var_2651_axes_0 = const()[name = string("op_2651_axes_0"), val = tensor([2])]; tensor var_2649_cast_fp16 = transpose(perm = var_2648, x = normed_65_cast_fp16)[name = string("transpose_94")]; tensor var_2651_cast_fp16 = expand_dims(axes = var_2651_axes_0, x = var_2649_cast_fp16)[name = string("op_2651_cast_fp16")]; string input_135_pad_type_0 = const()[name = string("input_135_pad_type_0"), val = string("valid")]; tensor input_135_strides_0 = const()[name = string("input_135_strides_0"), val = tensor([1, 1])]; tensor input_135_pad_0 = const()[name = string("input_135_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_135_dilations_0 = const()[name = string("input_135_dilations_0"), val = tensor([1, 1])]; int32 input_135_groups_0 = const()[name = string("input_135_groups_0"), val = int32(1)]; tensor input_135 = conv(dilations = input_135_dilations_0, groups = input_135_groups_0, pad = input_135_pad_0, pad_type = input_135_pad_type_0, strides = input_135_strides_0, weight = layers_8_feed_forward_w1_weight, x = var_2651_cast_fp16)[name = string("input_135")]; string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = layers_8_feed_forward_w3_weight, x = var_2651_cast_fp16)[name = string("b_17")]; tensor var_2679 = silu(x = input_135)[name = string("op_2679")]; tensor input_137 = mul(x = var_2679, y = b_17)[name = string("input_137")]; string mlp_33_pad_type_0 = const()[name = string("mlp_33_pad_type_0"), val = string("valid")]; tensor mlp_33_strides_0 = const()[name = string("mlp_33_strides_0"), val = tensor([1, 1])]; tensor mlp_33_pad_0 = const()[name = string("mlp_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_33_dilations_0 = const()[name = string("mlp_33_dilations_0"), val = tensor([1, 1])]; int32 mlp_33_groups_0 = const()[name = string("mlp_33_groups_0"), val = int32(1)]; tensor mlp_33 = conv(dilations = mlp_33_dilations_0, groups = mlp_33_groups_0, pad = mlp_33_pad_0, pad_type = mlp_33_pad_type_0, strides = mlp_33_strides_0, weight = layers_8_feed_forward_w2_weight, x = input_137)[name = string("mlp_33")]; tensor var_2693_axes_0 = const()[name = string("op_2693_axes_0"), val = tensor([2])]; tensor var_2693 = squeeze(axes = var_2693_axes_0, x = mlp_33)[name = string("op_2693")]; tensor var_2697 = const()[name = string("op_2697"), val = tensor([0, 2, 1])]; tensor mlp_35 = transpose(perm = var_2697, x = var_2693)[name = string("transpose_93")]; tensor x_49_cast_fp16 = add(x = x_47_cast_fp16, y = mlp_35)[name = string("x_49_cast_fp16")]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2701_cast_fp16 = mul(x = x_49_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2701_cast_fp16")]; int32 var_2703 = const()[name = string("op_2703"), val = int32(-1)]; bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; tensor input_139_cast_fp16 = concat(axis = var_2703, interleave = input_139_interleave_0, values = (x_49_cast_fp16, var_2701_cast_fp16))[name = string("input_139_cast_fp16")]; tensor normed_67_axes_0 = const()[name = string("normed_67_axes_0"), val = tensor([-1])]; fp16 var_2709_to_fp16 = const()[name = string("op_2709_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_67_cast_fp16 = layer_norm(axes = normed_67_axes_0, epsilon = var_2709_to_fp16, x = input_139_cast_fp16)[name = string("normed_67_cast_fp16")]; tensor var_2712_split_sizes_0 = const()[name = string("op_2712_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2712_axis_0 = const()[name = string("op_2712_axis_0"), val = int32(-1)]; tensor var_2712_cast_fp16_0, tensor var_2712_cast_fp16_1 = split(axis = var_2712_axis_0, split_sizes = var_2712_split_sizes_0, x = normed_67_cast_fp16)[name = string("op_2712_cast_fp16")]; tensor layers_9_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829506944)))]; tensor hidden_states_19_cast_fp16 = mul(x = var_2712_cast_fp16_0, y = layers_9_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; tensor var_2718 = const()[name = string("op_2718"), val = tensor([0, 2, 1])]; tensor var_2721_axes_0 = const()[name = string("op_2721_axes_0"), val = tensor([2])]; tensor var_2719_cast_fp16 = transpose(perm = var_2718, x = hidden_states_19_cast_fp16)[name = string("transpose_92")]; tensor var_2721_cast_fp16 = expand_dims(axes = var_2721_axes_0, x = var_2719_cast_fp16)[name = string("op_2721_cast_fp16")]; string BCx_13_pad_type_0 = const()[name = string("BCx_13_pad_type_0"), val = string("valid")]; tensor BCx_13_strides_0 = const()[name = string("BCx_13_strides_0"), val = tensor([1, 1])]; tensor BCx_13_pad_0 = const()[name = string("BCx_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_13_dilations_0 = const()[name = string("BCx_13_dilations_0"), val = tensor([1, 1])]; int32 BCx_13_groups_0 = const()[name = string("BCx_13_groups_0"), val = int32(1)]; tensor BCx_13 = conv(dilations = BCx_13_dilations_0, groups = BCx_13_groups_0, pad = BCx_13_pad_0, pad_type = BCx_13_pad_type_0, strides = BCx_13_strides_0, weight = layers_9_conv_in_proj_weight, x = var_2721_cast_fp16)[name = string("BCx_13")]; tensor var_2738_split_sizes_0 = const()[name = string("op_2738_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_2738_axis_0 = const()[name = string("op_2738_axis_0"), val = int32(1)]; tensor var_2738_0, tensor var_2738_1, tensor var_2738_2 = split(axis = var_2738_axis_0, split_sizes = var_2738_split_sizes_0, x = BCx_13)[name = string("op_2738")]; tensor Bx_13 = mul(x = var_2738_0, y = var_2738_2)[name = string("Bx_13")]; tensor var_2744_begin_0 = const()[name = string("op_2744_begin_0"), val = tensor([6, 0, 0])]; tensor var_2744_end_0 = const()[name = string("op_2744_end_0"), val = tensor([7, 1024, 3])]; tensor var_2744_end_mask_0 = const()[name = string("op_2744_end_mask_0"), val = tensor([false, true, true])]; tensor var_2744_squeeze_mask_0 = const()[name = string("op_2744_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_2744_cast_fp16 = slice_by_index(begin = var_2744_begin_0, end = var_2744_end_0, end_mask = var_2744_end_mask_0, squeeze_mask = var_2744_squeeze_mask_0, x = conv_state_in)[name = string("op_2744_cast_fp16")]; tensor var_2746_axes_0 = const()[name = string("op_2746_axes_0"), val = tensor([0])]; tensor var_2746_cast_fp16 = expand_dims(axes = var_2746_axes_0, x = var_2744_cast_fp16)[name = string("op_2746_cast_fp16")]; tensor slot_13_axes_0 = const()[name = string("slot_13_axes_0"), val = tensor([2])]; tensor slot_13_cast_fp16 = expand_dims(axes = slot_13_axes_0, x = var_2746_cast_fp16)[name = string("slot_13_cast_fp16")]; tensor live_tail_13_begin_0 = const()[name = string("live_tail_13_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_13_end_0 = const()[name = string("live_tail_13_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_13_end_mask_0 = const()[name = string("live_tail_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_13_cast_fp16 = slice_by_index(begin = live_tail_13_begin_0, end = live_tail_13_end_0, end_mask = live_tail_13_end_mask_0, x = slot_13_cast_fp16)[name = string("live_tail_13_cast_fp16")]; int32 var_2755 = const()[name = string("op_2755"), val = int32(-1)]; bool new_state_13_interleave_0 = const()[name = string("new_state_13_interleave_0"), val = bool(false)]; tensor new_state_13_cast_fp16 = concat(axis = var_2755, interleave = new_state_13_interleave_0, values = (live_tail_13_cast_fp16, Bx_13))[name = string("new_state_13_cast_fp16")]; tensor var_2758_axes_0 = const()[name = string("op_2758_axes_0"), val = tensor([0])]; tensor var_2758_cast_fp16 = squeeze(axes = var_2758_axes_0, x = new_state_13_cast_fp16)[name = string("op_2758_cast_fp16")]; tensor var_2760_axes_0 = const()[name = string("op_2760_axes_0"), val = tensor([1])]; tensor var_2760_cast_fp16 = squeeze(axes = var_2760_axes_0, x = var_2758_cast_fp16)[name = string("op_2760_cast_fp16")]; string conv_out_13_pad_type_0 = const()[name = string("conv_out_13_pad_type_0"), val = string("valid")]; int32 conv_out_13_groups_0 = const()[name = string("conv_out_13_groups_0"), val = int32(1024)]; tensor conv_out_13_strides_0 = const()[name = string("conv_out_13_strides_0"), val = tensor([1, 1])]; tensor conv_out_13_pad_0 = const()[name = string("conv_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_13_dilations_0 = const()[name = string("conv_out_13_dilations_0"), val = tensor([1, 1])]; tensor layers_9_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_9_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829509056)))]; tensor conv_out_13_cast_fp16 = conv(dilations = conv_out_13_dilations_0, groups = conv_out_13_groups_0, pad = conv_out_13_pad_0, pad_type = conv_out_13_pad_type_0, strides = conv_out_13_strides_0, weight = layers_9_conv_conv_weight_promoted_to_fp16, x = new_state_13_cast_fp16)[name = string("conv_out_13_cast_fp16")]; tensor input_143_cast_fp16 = mul(x = var_2738_1, y = conv_out_13_cast_fp16)[name = string("input_143_cast_fp16")]; string y_13_pad_type_0 = const()[name = string("y_13_pad_type_0"), val = string("valid")]; tensor y_13_strides_0 = const()[name = string("y_13_strides_0"), val = tensor([1, 1])]; tensor y_13_pad_0 = const()[name = string("y_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_13_dilations_0 = const()[name = string("y_13_dilations_0"), val = tensor([1, 1])]; int32 y_13_groups_0 = const()[name = string("y_13_groups_0"), val = int32(1)]; tensor layers_9_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_9_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829515264)))]; tensor y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = layers_9_conv_out_proj_weight_promoted_to_fp16, x = input_143_cast_fp16)[name = string("y_13_cast_fp16")]; tensor var_2786_axes_0 = const()[name = string("op_2786_axes_0"), val = tensor([2])]; tensor var_2786_cast_fp16 = squeeze(axes = var_2786_axes_0, x = y_13_cast_fp16)[name = string("op_2786_cast_fp16")]; tensor var_2790 = const()[name = string("op_2790"), val = tensor([0, 2, 1])]; tensor op_out_19_cast_fp16 = transpose(perm = var_2790, x = var_2786_cast_fp16)[name = string("transpose_91")]; tensor x_51_cast_fp16 = add(x = x_49_cast_fp16, y = op_out_19_cast_fp16)[name = string("x_51_cast_fp16")]; fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2794_cast_fp16 = mul(x = x_51_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2794_cast_fp16")]; int32 var_2796 = const()[name = string("op_2796"), val = int32(-1)]; bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; tensor input_145_cast_fp16 = concat(axis = var_2796, interleave = input_145_interleave_0, values = (x_51_cast_fp16, var_2794_cast_fp16))[name = string("input_145_cast_fp16")]; tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; fp16 var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2802_to_fp16, x = input_145_cast_fp16)[name = string("normed_69_cast_fp16")]; tensor var_2805_split_sizes_0 = const()[name = string("op_2805_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2805_axis_0 = const()[name = string("op_2805_axis_0"), val = int32(-1)]; tensor var_2805_cast_fp16_0, tensor var_2805_cast_fp16_1 = split(axis = var_2805_axis_0, split_sizes = var_2805_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2805_cast_fp16")]; tensor layers_9_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831612480)))]; tensor normed_71_cast_fp16 = mul(x = var_2805_cast_fp16_0, y = layers_9_ffn_norm_weight_promoted_to_fp16)[name = string("normed_71_cast_fp16")]; tensor var_2811 = const()[name = string("op_2811"), val = tensor([0, 2, 1])]; tensor var_2814_axes_0 = const()[name = string("op_2814_axes_0"), val = tensor([2])]; tensor var_2812_cast_fp16 = transpose(perm = var_2811, x = normed_71_cast_fp16)[name = string("transpose_90")]; tensor var_2814_cast_fp16 = expand_dims(axes = var_2814_axes_0, x = var_2812_cast_fp16)[name = string("op_2814_cast_fp16")]; string input_149_pad_type_0 = const()[name = string("input_149_pad_type_0"), val = string("valid")]; tensor input_149_strides_0 = const()[name = string("input_149_strides_0"), val = tensor([1, 1])]; tensor input_149_pad_0 = const()[name = string("input_149_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_149_dilations_0 = const()[name = string("input_149_dilations_0"), val = tensor([1, 1])]; int32 input_149_groups_0 = const()[name = string("input_149_groups_0"), val = int32(1)]; tensor input_149 = conv(dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = layers_9_feed_forward_w1_weight, x = var_2814_cast_fp16)[name = string("input_149")]; string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = layers_9_feed_forward_w3_weight, x = var_2814_cast_fp16)[name = string("b_19")]; tensor var_2842 = silu(x = input_149)[name = string("op_2842")]; tensor input_151 = mul(x = var_2842, y = b_19)[name = string("input_151")]; string mlp_37_pad_type_0 = const()[name = string("mlp_37_pad_type_0"), val = string("valid")]; tensor mlp_37_strides_0 = const()[name = string("mlp_37_strides_0"), val = tensor([1, 1])]; tensor mlp_37_pad_0 = const()[name = string("mlp_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_37_dilations_0 = const()[name = string("mlp_37_dilations_0"), val = tensor([1, 1])]; int32 mlp_37_groups_0 = const()[name = string("mlp_37_groups_0"), val = int32(1)]; tensor mlp_37 = conv(dilations = mlp_37_dilations_0, groups = mlp_37_groups_0, pad = mlp_37_pad_0, pad_type = mlp_37_pad_type_0, strides = mlp_37_strides_0, weight = layers_9_feed_forward_w2_weight, x = input_151)[name = string("mlp_37")]; tensor var_2856_axes_0 = const()[name = string("op_2856_axes_0"), val = tensor([2])]; tensor var_2856 = squeeze(axes = var_2856_axes_0, x = mlp_37)[name = string("op_2856")]; tensor var_2860 = const()[name = string("op_2860"), val = tensor([0, 2, 1])]; tensor mlp_39 = transpose(perm = var_2860, x = var_2856)[name = string("transpose_89")]; tensor x_53_cast_fp16 = add(x = x_51_cast_fp16, y = mlp_39)[name = string("x_53_cast_fp16")]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2864_cast_fp16 = mul(x = x_53_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2864_cast_fp16")]; int32 var_2866 = const()[name = string("op_2866"), val = int32(-1)]; bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; tensor input_153_cast_fp16 = concat(axis = var_2866, interleave = input_153_interleave_0, values = (x_53_cast_fp16, var_2864_cast_fp16))[name = string("input_153_cast_fp16")]; tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2872_to_fp16, x = input_153_cast_fp16)[name = string("normed_73_cast_fp16")]; tensor var_2875_split_sizes_0 = const()[name = string("op_2875_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_2875_axis_0 = const()[name = string("op_2875_axis_0"), val = int32(-1)]; tensor var_2875_cast_fp16_0, tensor var_2875_cast_fp16_1 = split(axis = var_2875_axis_0, split_sizes = var_2875_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2875_cast_fp16")]; tensor layers_10_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831614592)))]; tensor hidden_states_21_cast_fp16 = mul(x = var_2875_cast_fp16_0, y = layers_10_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor var_2881 = const()[name = string("op_2881"), val = tensor([0, 2, 1])]; tensor var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor([2])]; tensor var_2882_cast_fp16 = transpose(perm = var_2881, x = hidden_states_21_cast_fp16)[name = string("transpose_88")]; tensor var_2884_cast_fp16 = expand_dims(axes = var_2884_axes_0, x = var_2882_cast_fp16)[name = string("op_2884_cast_fp16")]; string var_2900_pad_type_0 = const()[name = string("op_2900_pad_type_0"), val = string("valid")]; tensor var_2900_strides_0 = const()[name = string("op_2900_strides_0"), val = tensor([1, 1])]; tensor var_2900_pad_0 = const()[name = string("op_2900_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2900_dilations_0 = const()[name = string("op_2900_dilations_0"), val = tensor([1, 1])]; int32 var_2900_groups_0 = const()[name = string("op_2900_groups_0"), val = int32(1)]; tensor var_2900 = conv(dilations = var_2900_dilations_0, groups = var_2900_groups_0, pad = var_2900_pad_0, pad_type = var_2900_pad_type_0, strides = var_2900_strides_0, weight = layers_10_self_attn_q_proj_weight, x = var_2884_cast_fp16)[name = string("op_2900")]; tensor var_2905 = const()[name = string("op_2905"), val = tensor([1, 16, 64, 1])]; tensor var_2906 = reshape(shape = var_2905, x = var_2900)[name = string("op_2906")]; tensor var_2911 = const()[name = string("op_2911"), val = tensor([0, 1, 3, 2])]; string var_2928_pad_type_0 = const()[name = string("op_2928_pad_type_0"), val = string("valid")]; tensor var_2928_strides_0 = const()[name = string("op_2928_strides_0"), val = tensor([1, 1])]; tensor var_2928_pad_0 = const()[name = string("op_2928_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2928_dilations_0 = const()[name = string("op_2928_dilations_0"), val = tensor([1, 1])]; int32 var_2928_groups_0 = const()[name = string("op_2928_groups_0"), val = int32(1)]; tensor var_2928 = conv(dilations = var_2928_dilations_0, groups = var_2928_groups_0, pad = var_2928_pad_0, pad_type = var_2928_pad_type_0, strides = var_2928_strides_0, weight = layers_10_self_attn_k_proj_weight, x = var_2884_cast_fp16)[name = string("op_2928")]; tensor var_2933 = const()[name = string("op_2933"), val = tensor([1, 8, 64, 1])]; tensor var_2934 = reshape(shape = var_2933, x = var_2928)[name = string("op_2934")]; tensor var_2939 = const()[name = string("op_2939"), val = tensor([0, 1, 3, 2])]; string var_2956_pad_type_0 = const()[name = string("op_2956_pad_type_0"), val = string("valid")]; tensor var_2956_strides_0 = const()[name = string("op_2956_strides_0"), val = tensor([1, 1])]; tensor var_2956_pad_0 = const()[name = string("op_2956_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2956_dilations_0 = const()[name = string("op_2956_dilations_0"), val = tensor([1, 1])]; int32 var_2956_groups_0 = const()[name = string("op_2956_groups_0"), val = int32(1)]; tensor var_2956 = conv(dilations = var_2956_dilations_0, groups = var_2956_groups_0, pad = var_2956_pad_0, pad_type = var_2956_pad_type_0, strides = var_2956_strides_0, weight = layers_10_self_attn_v_proj_weight, x = var_2884_cast_fp16)[name = string("op_2956")]; tensor var_2961 = const()[name = string("op_2961"), val = tensor([1, 8, 64, 1])]; tensor var_2962 = reshape(shape = var_2961, x = var_2956)[name = string("op_2962")]; tensor var_2967 = const()[name = string("op_2967"), val = tensor([0, 1, 3, 2])]; fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; tensor var_2912 = transpose(perm = var_2911, x = var_2906)[name = string("transpose_87")]; tensor var_2974 = mul(x = var_2912, y = const_33_promoted)[name = string("op_2974")]; int32 var_2976 = const()[name = string("op_2976"), val = int32(-1)]; bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)]; tensor input_157 = concat(axis = var_2976, interleave = input_157_interleave_0, values = (var_2912, var_2974))[name = string("input_157")]; tensor normed_75_axes_0 = const()[name = string("normed_75_axes_0"), val = tensor([-1])]; fp16 var_2982_to_fp16 = const()[name = string("op_2982_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_75_cast_fp16 = layer_norm(axes = normed_75_axes_0, epsilon = var_2982_to_fp16, x = input_157)[name = string("normed_75_cast_fp16")]; tensor var_2985_split_sizes_0 = const()[name = string("op_2985_split_sizes_0"), val = tensor([64, 64])]; int32 var_2985_axis_0 = const()[name = string("op_2985_axis_0"), val = int32(-1)]; tensor var_2985_0, tensor var_2985_1 = split(axis = var_2985_axis_0, split_sizes = var_2985_split_sizes_0, x = normed_75_cast_fp16)[name = string("op_2985")]; tensor q_13 = mul(x = var_2985_0, y = layers_10_self_attn_q_layernorm_weight)[name = string("q_13")]; fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; tensor var_2940 = transpose(perm = var_2939, x = var_2934)[name = string("transpose_86")]; tensor var_2988 = mul(x = var_2940, y = const_34_promoted)[name = string("op_2988")]; int32 var_2990 = const()[name = string("op_2990"), val = int32(-1)]; bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; tensor input_159 = concat(axis = var_2990, interleave = input_159_interleave_0, values = (var_2940, var_2988))[name = string("input_159")]; tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; fp16 var_2996_to_fp16 = const()[name = string("op_2996_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2996_to_fp16, x = input_159)[name = string("normed_77_cast_fp16")]; tensor var_2999_split_sizes_0 = const()[name = string("op_2999_split_sizes_0"), val = tensor([64, 64])]; int32 var_2999_axis_0 = const()[name = string("op_2999_axis_0"), val = int32(-1)]; tensor var_2999_0, tensor var_2999_1 = split(axis = var_2999_axis_0, split_sizes = var_2999_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2999")]; tensor k_13 = mul(x = var_2999_0, y = layers_10_self_attn_k_layernorm_weight)[name = string("k_13")]; tensor var_3002 = mul(x = q_13, y = cos)[name = string("op_3002")]; tensor var_3003_split_sizes_0 = const()[name = string("op_3003_split_sizes_0"), val = tensor([32, 32])]; int32 var_3003_axis_0 = const()[name = string("op_3003_axis_0"), val = int32(-1)]; tensor var_3003_0, tensor var_3003_1 = split(axis = var_3003_axis_0, split_sizes = var_3003_split_sizes_0, x = q_13)[name = string("op_3003")]; fp16 const_35_promoted = const()[name = string("const_35_promoted"), val = fp16(-0x1p+0)]; tensor var_3005 = mul(x = var_3003_1, y = const_35_promoted)[name = string("op_3005")]; int32 var_3007 = const()[name = string("op_3007"), val = int32(-1)]; bool var_3008_interleave_0 = const()[name = string("op_3008_interleave_0"), val = bool(false)]; tensor var_3008 = concat(axis = var_3007, interleave = var_3008_interleave_0, values = (var_3005, var_3003_0))[name = string("op_3008")]; tensor var_3009 = mul(x = var_3008, y = sin)[name = string("op_3009")]; tensor q_15 = add(x = var_3002, y = var_3009)[name = string("q_15")]; tensor var_3012 = mul(x = k_13, y = cos)[name = string("op_3012")]; tensor var_3013_split_sizes_0 = const()[name = string("op_3013_split_sizes_0"), val = tensor([32, 32])]; int32 var_3013_axis_0 = const()[name = string("op_3013_axis_0"), val = int32(-1)]; tensor var_3013_0, tensor var_3013_1 = split(axis = var_3013_axis_0, split_sizes = var_3013_split_sizes_0, x = k_13)[name = string("op_3013")]; fp16 const_36_promoted = const()[name = string("const_36_promoted"), val = fp16(-0x1p+0)]; tensor var_3015 = mul(x = var_3013_1, y = const_36_promoted)[name = string("op_3015")]; int32 var_3017 = const()[name = string("op_3017"), val = int32(-1)]; bool var_3018_interleave_0 = const()[name = string("op_3018_interleave_0"), val = bool(false)]; tensor var_3018 = concat(axis = var_3017, interleave = var_3018_interleave_0, values = (var_3015, var_3013_0))[name = string("op_3018")]; tensor var_3019 = mul(x = var_3018, y = sin)[name = string("op_3019")]; tensor k_15 = add(x = var_3012, y = var_3019)[name = string("k_15")]; tensor var_3024_begin_0 = const()[name = string("op_3024_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_3024_end_0 = const()[name = string("op_3024_end_0"), val = tensor([4, 8, 2048, 64])]; tensor var_3024_end_mask_0 = const()[name = string("op_3024_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3024_squeeze_mask_0 = const()[name = string("op_3024_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3024_cast_fp16 = slice_by_index(begin = var_3024_begin_0, end = var_3024_end_0, end_mask = var_3024_end_mask_0, squeeze_mask = var_3024_squeeze_mask_0, x = coreml_update_state_17)[name = string("op_3024_cast_fp16")]; tensor K_cache_7_axes_0 = const()[name = string("K_cache_7_axes_0"), val = tensor([0])]; tensor K_cache_7_cast_fp16 = expand_dims(axes = K_cache_7_axes_0, x = var_3024_cast_fp16)[name = string("K_cache_7_cast_fp16")]; tensor var_3029_begin_0 = const()[name = string("op_3029_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_3029_end_0 = const()[name = string("op_3029_end_0"), val = tensor([10, 8, 2048, 64])]; tensor var_3029_end_mask_0 = const()[name = string("op_3029_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3029_squeeze_mask_0 = const()[name = string("op_3029_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3029_cast_fp16 = slice_by_index(begin = var_3029_begin_0, end = var_3029_end_0, end_mask = var_3029_end_mask_0, squeeze_mask = var_3029_squeeze_mask_0, x = coreml_update_state_17)[name = string("op_3029_cast_fp16")]; tensor V_cache_7_axes_0 = const()[name = string("V_cache_7_axes_0"), val = tensor([0])]; tensor V_cache_7_cast_fp16 = expand_dims(axes = V_cache_7_axes_0, x = var_3029_cast_fp16)[name = string("V_cache_7_cast_fp16")]; tensor k_b_7_reps_0 = const()[name = string("k_b_7_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b_7 = tile(reps = k_b_7_reps_0, x = k_15)[name = string("k_b_7")]; tensor v_b_7_reps_0 = const()[name = string("v_b_7_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_2968 = transpose(perm = var_2967, x = var_2962)[name = string("transpose_85")]; tensor v_b_7 = tile(reps = v_b_7_reps_0, x = var_2968)[name = string("v_b_7")]; tensor var_3037_cast_fp16 = mul(x = K_cache_7_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3037_cast_fp16")]; tensor var_3038_cast_fp16 = mul(x = k_b_7, y = update_mask)[name = string("op_3038_cast_fp16")]; tensor K_new_7_cast_fp16 = add(x = var_3037_cast_fp16, y = var_3038_cast_fp16)[name = string("K_new_7_cast_fp16")]; tensor var_3044_cast_fp16 = mul(x = V_cache_7_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3044_cast_fp16")]; tensor var_3045_cast_fp16 = mul(x = v_b_7, y = update_mask)[name = string("op_3045_cast_fp16")]; tensor V_new_7_cast_fp16 = add(x = var_3044_cast_fp16, y = var_3045_cast_fp16)[name = string("V_new_7_cast_fp16")]; tensor var_3049_axes_0 = const()[name = string("op_3049_axes_0"), val = tensor([0])]; tensor var_3049_cast_fp16 = squeeze(axes = var_3049_axes_0, x = K_new_7_cast_fp16)[name = string("op_3049_cast_fp16")]; tensor concat_24 = const()[name = string("concat_24"), val = tensor([3, 0, 0, 0])]; tensor concat_25 = const()[name = string("concat_25"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_24, begin_mask = kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_25, end_mask = kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_7_stride_0, update = var_3049_cast_fp16, x = coreml_update_state_17)[name = string("kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_7_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_18 = read_state(input = kv_cache_0)[name = string("coreml_update_state_18")]; tensor var_3056_axes_0 = const()[name = string("op_3056_axes_0"), val = tensor([0])]; tensor var_3056_cast_fp16 = squeeze(axes = var_3056_axes_0, x = V_new_7_cast_fp16)[name = string("op_3056_cast_fp16")]; tensor concat_26 = const()[name = string("concat_26"), val = tensor([9, 0, 0, 0])]; tensor concat_27 = const()[name = string("concat_27"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_26, begin_mask = kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_27, end_mask = kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_8_stride_0, update = var_3056_cast_fp16, x = coreml_update_state_18)[name = string("kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_8_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_19 = read_state(input = kv_cache_0)[name = string("coreml_update_state_19")]; tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_new_7_cast_fp16)[name = string("transpose_84")]; tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; tensor concat_28 = const()[name = string("concat_28"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_28, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_83")]; tensor reshape_13_cast_fp16 = reshape(shape = concat_29, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_new_7_cast_fp16)[name = string("transpose_81")]; tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_30, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_80")]; tensor reshape_15_cast_fp16 = reshape(shape = concat_31, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor V_e_7_perm_0 = const()[name = string("V_e_7_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_3083_transpose_x_0 = const()[name = string("op_3083_transpose_x_0"), val = bool(false)]; bool var_3083_transpose_y_0 = const()[name = string("op_3083_transpose_y_0"), val = bool(false)]; tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_82")]; tensor var_3083_cast_fp16 = matmul(transpose_x = var_3083_transpose_x_0, transpose_y = var_3083_transpose_y_0, x = q_15, y = transpose_27_cast_fp16)[name = string("op_3083_cast_fp16")]; fp16 var_3084_to_fp16 = const()[name = string("op_3084_to_fp16"), val = fp16(0x1p-3)]; tensor attn_19_cast_fp16 = mul(x = var_3083_cast_fp16, y = var_3084_to_fp16)[name = string("attn_19_cast_fp16")]; tensor attn_21_cast_fp16 = add(x = attn_19_cast_fp16, y = causal_mask)[name = string("attn_21_cast_fp16")]; int32 var_3093 = const()[name = string("op_3093"), val = int32(-1)]; tensor var_3095_cast_fp16 = softmax(axis = var_3093, x = attn_21_cast_fp16)[name = string("op_3095_cast_fp16")]; bool var_3111_transpose_x_0 = const()[name = string("op_3111_transpose_x_0"), val = bool(false)]; bool var_3111_transpose_y_0 = const()[name = string("op_3111_transpose_y_0"), val = bool(false)]; tensor V_e_7_cast_fp16 = transpose(perm = V_e_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_79")]; tensor var_3111_cast_fp16 = matmul(transpose_x = var_3111_transpose_x_0, transpose_y = var_3111_transpose_y_0, x = var_3095_cast_fp16, y = V_e_7_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor var_3121 = const()[name = string("op_3121"), val = tensor([0, 2, 1, 3])]; tensor var_3128 = const()[name = string("op_3128"), val = tensor([1, 1, -1])]; tensor var_3122 = transpose(perm = var_3121, x = var_3111_cast_fp16)[name = string("transpose_78")]; tensor out_15 = reshape(shape = var_3128, x = var_3122)[name = string("out_15")]; tensor var_3133 = const()[name = string("op_3133"), val = tensor([0, 2, 1])]; tensor squeeze_3 = const()[name = string("squeeze_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831616704)))]; string var_3149_pad_type_0 = const()[name = string("op_3149_pad_type_0"), val = string("valid")]; int32 var_3149_groups_0 = const()[name = string("op_3149_groups_0"), val = int32(1)]; tensor var_3149_strides_0 = const()[name = string("op_3149_strides_0"), val = tensor([1])]; tensor var_3149_pad_0 = const()[name = string("op_3149_pad_0"), val = tensor([0, 0])]; tensor var_3149_dilations_0 = const()[name = string("op_3149_dilations_0"), val = tensor([1])]; tensor var_3134 = transpose(perm = var_3133, x = out_15)[name = string("transpose_77")]; tensor var_3149 = conv(dilations = var_3149_dilations_0, groups = var_3149_groups_0, pad = var_3149_pad_0, pad_type = var_3149_pad_type_0, strides = var_3149_strides_0, weight = squeeze_3, x = var_3134)[name = string("op_3149")]; tensor var_3153 = const()[name = string("op_3153"), val = tensor([0, 2, 1])]; tensor op_out_21 = transpose(perm = var_3153, x = var_3149)[name = string("transpose_76")]; tensor x_59_cast_fp16 = add(x = x_53_cast_fp16, y = op_out_21)[name = string("x_59_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3157_cast_fp16 = mul(x = x_59_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3157_cast_fp16")]; int32 var_3159 = const()[name = string("op_3159"), val = int32(-1)]; bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; tensor input_163_cast_fp16 = concat(axis = var_3159, interleave = input_163_interleave_0, values = (x_59_cast_fp16, var_3157_cast_fp16))[name = string("input_163_cast_fp16")]; tensor normed_79_axes_0 = const()[name = string("normed_79_axes_0"), val = tensor([-1])]; fp16 var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_79_cast_fp16 = layer_norm(axes = normed_79_axes_0, epsilon = var_3165_to_fp16, x = input_163_cast_fp16)[name = string("normed_79_cast_fp16")]; tensor var_3168_split_sizes_0 = const()[name = string("op_3168_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3168_axis_0 = const()[name = string("op_3168_axis_0"), val = int32(-1)]; tensor var_3168_cast_fp16_0, tensor var_3168_cast_fp16_1 = split(axis = var_3168_axis_0, split_sizes = var_3168_split_sizes_0, x = normed_79_cast_fp16)[name = string("op_3168_cast_fp16")]; tensor layers_10_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833713920)))]; tensor normed_81_cast_fp16 = mul(x = var_3168_cast_fp16_0, y = layers_10_ffn_norm_weight_promoted_to_fp16)[name = string("normed_81_cast_fp16")]; tensor var_3174 = const()[name = string("op_3174"), val = tensor([0, 2, 1])]; tensor var_3177_axes_0 = const()[name = string("op_3177_axes_0"), val = tensor([2])]; tensor var_3175_cast_fp16 = transpose(perm = var_3174, x = normed_81_cast_fp16)[name = string("transpose_75")]; tensor var_3177_cast_fp16 = expand_dims(axes = var_3177_axes_0, x = var_3175_cast_fp16)[name = string("op_3177_cast_fp16")]; string input_167_pad_type_0 = const()[name = string("input_167_pad_type_0"), val = string("valid")]; tensor input_167_strides_0 = const()[name = string("input_167_strides_0"), val = tensor([1, 1])]; tensor input_167_pad_0 = const()[name = string("input_167_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_167_dilations_0 = const()[name = string("input_167_dilations_0"), val = tensor([1, 1])]; int32 input_167_groups_0 = const()[name = string("input_167_groups_0"), val = int32(1)]; tensor input_167 = conv(dilations = input_167_dilations_0, groups = input_167_groups_0, pad = input_167_pad_0, pad_type = input_167_pad_type_0, strides = input_167_strides_0, weight = layers_10_feed_forward_w1_weight, x = var_3177_cast_fp16)[name = string("input_167")]; string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = layers_10_feed_forward_w3_weight, x = var_3177_cast_fp16)[name = string("b_21")]; tensor var_3205 = silu(x = input_167)[name = string("op_3205")]; tensor input_169 = mul(x = var_3205, y = b_21)[name = string("input_169")]; string mlp_41_pad_type_0 = const()[name = string("mlp_41_pad_type_0"), val = string("valid")]; tensor mlp_41_strides_0 = const()[name = string("mlp_41_strides_0"), val = tensor([1, 1])]; tensor mlp_41_pad_0 = const()[name = string("mlp_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_41_dilations_0 = const()[name = string("mlp_41_dilations_0"), val = tensor([1, 1])]; int32 mlp_41_groups_0 = const()[name = string("mlp_41_groups_0"), val = int32(1)]; tensor mlp_41 = conv(dilations = mlp_41_dilations_0, groups = mlp_41_groups_0, pad = mlp_41_pad_0, pad_type = mlp_41_pad_type_0, strides = mlp_41_strides_0, weight = layers_10_feed_forward_w2_weight, x = input_169)[name = string("mlp_41")]; tensor var_3219_axes_0 = const()[name = string("op_3219_axes_0"), val = tensor([2])]; tensor var_3219 = squeeze(axes = var_3219_axes_0, x = mlp_41)[name = string("op_3219")]; tensor var_3223 = const()[name = string("op_3223"), val = tensor([0, 2, 1])]; tensor mlp_43 = transpose(perm = var_3223, x = var_3219)[name = string("transpose_74")]; tensor x_61_cast_fp16 = add(x = x_59_cast_fp16, y = mlp_43)[name = string("x_61_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3227_cast_fp16 = mul(x = x_61_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3227_cast_fp16")]; int32 var_3229 = const()[name = string("op_3229"), val = int32(-1)]; bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; tensor input_171_cast_fp16 = concat(axis = var_3229, interleave = input_171_interleave_0, values = (x_61_cast_fp16, var_3227_cast_fp16))[name = string("input_171_cast_fp16")]; tensor normed_83_axes_0 = const()[name = string("normed_83_axes_0"), val = tensor([-1])]; fp16 var_3235_to_fp16 = const()[name = string("op_3235_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_83_cast_fp16 = layer_norm(axes = normed_83_axes_0, epsilon = var_3235_to_fp16, x = input_171_cast_fp16)[name = string("normed_83_cast_fp16")]; tensor var_3238_split_sizes_0 = const()[name = string("op_3238_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3238_axis_0 = const()[name = string("op_3238_axis_0"), val = int32(-1)]; tensor var_3238_cast_fp16_0, tensor var_3238_cast_fp16_1 = split(axis = var_3238_axis_0, split_sizes = var_3238_split_sizes_0, x = normed_83_cast_fp16)[name = string("op_3238_cast_fp16")]; tensor layers_11_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833716032)))]; tensor hidden_states_23_cast_fp16 = mul(x = var_3238_cast_fp16_0, y = layers_11_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor var_3244 = const()[name = string("op_3244"), val = tensor([0, 2, 1])]; tensor var_3247_axes_0 = const()[name = string("op_3247_axes_0"), val = tensor([2])]; tensor var_3245_cast_fp16 = transpose(perm = var_3244, x = hidden_states_23_cast_fp16)[name = string("transpose_73")]; tensor var_3247_cast_fp16 = expand_dims(axes = var_3247_axes_0, x = var_3245_cast_fp16)[name = string("op_3247_cast_fp16")]; string BCx_15_pad_type_0 = const()[name = string("BCx_15_pad_type_0"), val = string("valid")]; tensor BCx_15_strides_0 = const()[name = string("BCx_15_strides_0"), val = tensor([1, 1])]; tensor BCx_15_pad_0 = const()[name = string("BCx_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_15_dilations_0 = const()[name = string("BCx_15_dilations_0"), val = tensor([1, 1])]; int32 BCx_15_groups_0 = const()[name = string("BCx_15_groups_0"), val = int32(1)]; tensor BCx_15 = conv(dilations = BCx_15_dilations_0, groups = BCx_15_groups_0, pad = BCx_15_pad_0, pad_type = BCx_15_pad_type_0, strides = BCx_15_strides_0, weight = layers_11_conv_in_proj_weight, x = var_3247_cast_fp16)[name = string("BCx_15")]; tensor var_3264_split_sizes_0 = const()[name = string("op_3264_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_3264_axis_0 = const()[name = string("op_3264_axis_0"), val = int32(1)]; tensor var_3264_0, tensor var_3264_1, tensor var_3264_2 = split(axis = var_3264_axis_0, split_sizes = var_3264_split_sizes_0, x = BCx_15)[name = string("op_3264")]; tensor Bx_15 = mul(x = var_3264_0, y = var_3264_2)[name = string("Bx_15")]; tensor var_3270_begin_0 = const()[name = string("op_3270_begin_0"), val = tensor([7, 0, 0])]; tensor var_3270_end_0 = const()[name = string("op_3270_end_0"), val = tensor([8, 1024, 3])]; tensor var_3270_end_mask_0 = const()[name = string("op_3270_end_mask_0"), val = tensor([false, true, true])]; tensor var_3270_squeeze_mask_0 = const()[name = string("op_3270_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_3270_cast_fp16 = slice_by_index(begin = var_3270_begin_0, end = var_3270_end_0, end_mask = var_3270_end_mask_0, squeeze_mask = var_3270_squeeze_mask_0, x = conv_state_in)[name = string("op_3270_cast_fp16")]; tensor var_3272_axes_0 = const()[name = string("op_3272_axes_0"), val = tensor([0])]; tensor var_3272_cast_fp16 = expand_dims(axes = var_3272_axes_0, x = var_3270_cast_fp16)[name = string("op_3272_cast_fp16")]; tensor slot_15_axes_0 = const()[name = string("slot_15_axes_0"), val = tensor([2])]; tensor slot_15_cast_fp16 = expand_dims(axes = slot_15_axes_0, x = var_3272_cast_fp16)[name = string("slot_15_cast_fp16")]; tensor live_tail_15_begin_0 = const()[name = string("live_tail_15_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_15_end_0 = const()[name = string("live_tail_15_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_15_end_mask_0 = const()[name = string("live_tail_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_15_cast_fp16 = slice_by_index(begin = live_tail_15_begin_0, end = live_tail_15_end_0, end_mask = live_tail_15_end_mask_0, x = slot_15_cast_fp16)[name = string("live_tail_15_cast_fp16")]; int32 var_3281 = const()[name = string("op_3281"), val = int32(-1)]; bool new_state_15_interleave_0 = const()[name = string("new_state_15_interleave_0"), val = bool(false)]; tensor new_state_15_cast_fp16 = concat(axis = var_3281, interleave = new_state_15_interleave_0, values = (live_tail_15_cast_fp16, Bx_15))[name = string("new_state_15_cast_fp16")]; tensor var_3284_axes_0 = const()[name = string("op_3284_axes_0"), val = tensor([0])]; tensor var_3284_cast_fp16 = squeeze(axes = var_3284_axes_0, x = new_state_15_cast_fp16)[name = string("op_3284_cast_fp16")]; tensor var_3286_axes_0 = const()[name = string("op_3286_axes_0"), val = tensor([1])]; tensor var_3286_cast_fp16 = squeeze(axes = var_3286_axes_0, x = var_3284_cast_fp16)[name = string("op_3286_cast_fp16")]; string conv_out_15_pad_type_0 = const()[name = string("conv_out_15_pad_type_0"), val = string("valid")]; int32 conv_out_15_groups_0 = const()[name = string("conv_out_15_groups_0"), val = int32(1024)]; tensor conv_out_15_strides_0 = const()[name = string("conv_out_15_strides_0"), val = tensor([1, 1])]; tensor conv_out_15_pad_0 = const()[name = string("conv_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_15_dilations_0 = const()[name = string("conv_out_15_dilations_0"), val = tensor([1, 1])]; tensor layers_11_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_11_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833718144)))]; tensor conv_out_15_cast_fp16 = conv(dilations = conv_out_15_dilations_0, groups = conv_out_15_groups_0, pad = conv_out_15_pad_0, pad_type = conv_out_15_pad_type_0, strides = conv_out_15_strides_0, weight = layers_11_conv_conv_weight_promoted_to_fp16, x = new_state_15_cast_fp16)[name = string("conv_out_15_cast_fp16")]; tensor input_175_cast_fp16 = mul(x = var_3264_1, y = conv_out_15_cast_fp16)[name = string("input_175_cast_fp16")]; string y_15_pad_type_0 = const()[name = string("y_15_pad_type_0"), val = string("valid")]; tensor y_15_strides_0 = const()[name = string("y_15_strides_0"), val = tensor([1, 1])]; tensor y_15_pad_0 = const()[name = string("y_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_15_dilations_0 = const()[name = string("y_15_dilations_0"), val = tensor([1, 1])]; int32 y_15_groups_0 = const()[name = string("y_15_groups_0"), val = int32(1)]; tensor layers_11_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_11_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833724352)))]; tensor y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = layers_11_conv_out_proj_weight_promoted_to_fp16, x = input_175_cast_fp16)[name = string("y_15_cast_fp16")]; tensor var_3312_axes_0 = const()[name = string("op_3312_axes_0"), val = tensor([2])]; tensor var_3312_cast_fp16 = squeeze(axes = var_3312_axes_0, x = y_15_cast_fp16)[name = string("op_3312_cast_fp16")]; tensor var_3316 = const()[name = string("op_3316"), val = tensor([0, 2, 1])]; tensor op_out_23_cast_fp16 = transpose(perm = var_3316, x = var_3312_cast_fp16)[name = string("transpose_72")]; tensor x_63_cast_fp16 = add(x = x_61_cast_fp16, y = op_out_23_cast_fp16)[name = string("x_63_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3320_cast_fp16 = mul(x = x_63_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3320_cast_fp16")]; int32 var_3322 = const()[name = string("op_3322"), val = int32(-1)]; bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; tensor input_177_cast_fp16 = concat(axis = var_3322, interleave = input_177_interleave_0, values = (x_63_cast_fp16, var_3320_cast_fp16))[name = string("input_177_cast_fp16")]; tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; fp16 var_3328_to_fp16 = const()[name = string("op_3328_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_3328_to_fp16, x = input_177_cast_fp16)[name = string("normed_85_cast_fp16")]; tensor var_3331_split_sizes_0 = const()[name = string("op_3331_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3331_axis_0 = const()[name = string("op_3331_axis_0"), val = int32(-1)]; tensor var_3331_cast_fp16_0, tensor var_3331_cast_fp16_1 = split(axis = var_3331_axis_0, split_sizes = var_3331_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_3331_cast_fp16")]; tensor layers_11_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835821568)))]; tensor normed_87_cast_fp16 = mul(x = var_3331_cast_fp16_0, y = layers_11_ffn_norm_weight_promoted_to_fp16)[name = string("normed_87_cast_fp16")]; tensor var_3337 = const()[name = string("op_3337"), val = tensor([0, 2, 1])]; tensor var_3340_axes_0 = const()[name = string("op_3340_axes_0"), val = tensor([2])]; tensor var_3338_cast_fp16 = transpose(perm = var_3337, x = normed_87_cast_fp16)[name = string("transpose_71")]; tensor var_3340_cast_fp16 = expand_dims(axes = var_3340_axes_0, x = var_3338_cast_fp16)[name = string("op_3340_cast_fp16")]; string input_181_pad_type_0 = const()[name = string("input_181_pad_type_0"), val = string("valid")]; tensor input_181_strides_0 = const()[name = string("input_181_strides_0"), val = tensor([1, 1])]; tensor input_181_pad_0 = const()[name = string("input_181_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_181_dilations_0 = const()[name = string("input_181_dilations_0"), val = tensor([1, 1])]; int32 input_181_groups_0 = const()[name = string("input_181_groups_0"), val = int32(1)]; tensor input_181 = conv(dilations = input_181_dilations_0, groups = input_181_groups_0, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = input_181_strides_0, weight = layers_11_feed_forward_w1_weight, x = var_3340_cast_fp16)[name = string("input_181")]; string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = layers_11_feed_forward_w3_weight, x = var_3340_cast_fp16)[name = string("b_23")]; tensor var_3368 = silu(x = input_181)[name = string("op_3368")]; tensor input_183 = mul(x = var_3368, y = b_23)[name = string("input_183")]; string mlp_45_pad_type_0 = const()[name = string("mlp_45_pad_type_0"), val = string("valid")]; tensor mlp_45_strides_0 = const()[name = string("mlp_45_strides_0"), val = tensor([1, 1])]; tensor mlp_45_pad_0 = const()[name = string("mlp_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_45_dilations_0 = const()[name = string("mlp_45_dilations_0"), val = tensor([1, 1])]; int32 mlp_45_groups_0 = const()[name = string("mlp_45_groups_0"), val = int32(1)]; tensor mlp_45 = conv(dilations = mlp_45_dilations_0, groups = mlp_45_groups_0, pad = mlp_45_pad_0, pad_type = mlp_45_pad_type_0, strides = mlp_45_strides_0, weight = layers_11_feed_forward_w2_weight, x = input_183)[name = string("mlp_45")]; tensor var_3382_axes_0 = const()[name = string("op_3382_axes_0"), val = tensor([2])]; tensor var_3382 = squeeze(axes = var_3382_axes_0, x = mlp_45)[name = string("op_3382")]; tensor var_3386 = const()[name = string("op_3386"), val = tensor([0, 2, 1])]; tensor mlp_47 = transpose(perm = var_3386, x = var_3382)[name = string("transpose_70")]; tensor x_65_cast_fp16 = add(x = x_63_cast_fp16, y = mlp_47)[name = string("x_65_cast_fp16")]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3390_cast_fp16 = mul(x = x_65_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3390_cast_fp16")]; int32 var_3392 = const()[name = string("op_3392"), val = int32(-1)]; bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; tensor input_185_cast_fp16 = concat(axis = var_3392, interleave = input_185_interleave_0, values = (x_65_cast_fp16, var_3390_cast_fp16))[name = string("input_185_cast_fp16")]; tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; fp16 var_3398_to_fp16 = const()[name = string("op_3398_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_3398_to_fp16, x = input_185_cast_fp16)[name = string("normed_89_cast_fp16")]; tensor var_3401_split_sizes_0 = const()[name = string("op_3401_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3401_axis_0 = const()[name = string("op_3401_axis_0"), val = int32(-1)]; tensor var_3401_cast_fp16_0, tensor var_3401_cast_fp16_1 = split(axis = var_3401_axis_0, split_sizes = var_3401_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_3401_cast_fp16")]; tensor layers_12_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_12_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835823680)))]; tensor hidden_states_25_cast_fp16 = mul(x = var_3401_cast_fp16_0, y = layers_12_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_25_cast_fp16")]; tensor var_3407 = const()[name = string("op_3407"), val = tensor([0, 2, 1])]; tensor var_3410_axes_0 = const()[name = string("op_3410_axes_0"), val = tensor([2])]; tensor var_3408_cast_fp16 = transpose(perm = var_3407, x = hidden_states_25_cast_fp16)[name = string("transpose_69")]; tensor var_3410_cast_fp16 = expand_dims(axes = var_3410_axes_0, x = var_3408_cast_fp16)[name = string("op_3410_cast_fp16")]; string var_3426_pad_type_0 = const()[name = string("op_3426_pad_type_0"), val = string("valid")]; tensor var_3426_strides_0 = const()[name = string("op_3426_strides_0"), val = tensor([1, 1])]; tensor var_3426_pad_0 = const()[name = string("op_3426_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3426_dilations_0 = const()[name = string("op_3426_dilations_0"), val = tensor([1, 1])]; int32 var_3426_groups_0 = const()[name = string("op_3426_groups_0"), val = int32(1)]; tensor var_3426 = conv(dilations = var_3426_dilations_0, groups = var_3426_groups_0, pad = var_3426_pad_0, pad_type = var_3426_pad_type_0, strides = var_3426_strides_0, weight = layers_12_self_attn_q_proj_weight, x = var_3410_cast_fp16)[name = string("op_3426")]; tensor var_3431 = const()[name = string("op_3431"), val = tensor([1, 16, 64, 1])]; tensor var_3432 = reshape(shape = var_3431, x = var_3426)[name = string("op_3432")]; tensor var_3437 = const()[name = string("op_3437"), val = tensor([0, 1, 3, 2])]; string var_3454_pad_type_0 = const()[name = string("op_3454_pad_type_0"), val = string("valid")]; tensor var_3454_strides_0 = const()[name = string("op_3454_strides_0"), val = tensor([1, 1])]; tensor var_3454_pad_0 = const()[name = string("op_3454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3454_dilations_0 = const()[name = string("op_3454_dilations_0"), val = tensor([1, 1])]; int32 var_3454_groups_0 = const()[name = string("op_3454_groups_0"), val = int32(1)]; tensor var_3454 = conv(dilations = var_3454_dilations_0, groups = var_3454_groups_0, pad = var_3454_pad_0, pad_type = var_3454_pad_type_0, strides = var_3454_strides_0, weight = layers_12_self_attn_k_proj_weight, x = var_3410_cast_fp16)[name = string("op_3454")]; tensor var_3459 = const()[name = string("op_3459"), val = tensor([1, 8, 64, 1])]; tensor var_3460 = reshape(shape = var_3459, x = var_3454)[name = string("op_3460")]; tensor var_3465 = const()[name = string("op_3465"), val = tensor([0, 1, 3, 2])]; string var_3482_pad_type_0 = const()[name = string("op_3482_pad_type_0"), val = string("valid")]; tensor var_3482_strides_0 = const()[name = string("op_3482_strides_0"), val = tensor([1, 1])]; tensor var_3482_pad_0 = const()[name = string("op_3482_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3482_dilations_0 = const()[name = string("op_3482_dilations_0"), val = tensor([1, 1])]; int32 var_3482_groups_0 = const()[name = string("op_3482_groups_0"), val = int32(1)]; tensor var_3482 = conv(dilations = var_3482_dilations_0, groups = var_3482_groups_0, pad = var_3482_pad_0, pad_type = var_3482_pad_type_0, strides = var_3482_strides_0, weight = layers_12_self_attn_v_proj_weight, x = var_3410_cast_fp16)[name = string("op_3482")]; tensor var_3487 = const()[name = string("op_3487"), val = tensor([1, 8, 64, 1])]; tensor var_3488 = reshape(shape = var_3487, x = var_3482)[name = string("op_3488")]; tensor var_3493 = const()[name = string("op_3493"), val = tensor([0, 1, 3, 2])]; fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; tensor var_3438 = transpose(perm = var_3437, x = var_3432)[name = string("transpose_68")]; tensor var_3500 = mul(x = var_3438, y = const_41_promoted)[name = string("op_3500")]; int32 var_3502 = const()[name = string("op_3502"), val = int32(-1)]; bool input_189_interleave_0 = const()[name = string("input_189_interleave_0"), val = bool(false)]; tensor input_189 = concat(axis = var_3502, interleave = input_189_interleave_0, values = (var_3438, var_3500))[name = string("input_189")]; tensor normed_91_axes_0 = const()[name = string("normed_91_axes_0"), val = tensor([-1])]; fp16 var_3508_to_fp16 = const()[name = string("op_3508_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_91_cast_fp16 = layer_norm(axes = normed_91_axes_0, epsilon = var_3508_to_fp16, x = input_189)[name = string("normed_91_cast_fp16")]; tensor var_3511_split_sizes_0 = const()[name = string("op_3511_split_sizes_0"), val = tensor([64, 64])]; int32 var_3511_axis_0 = const()[name = string("op_3511_axis_0"), val = int32(-1)]; tensor var_3511_0, tensor var_3511_1 = split(axis = var_3511_axis_0, split_sizes = var_3511_split_sizes_0, x = normed_91_cast_fp16)[name = string("op_3511")]; tensor q_17 = mul(x = var_3511_0, y = layers_12_self_attn_q_layernorm_weight)[name = string("q_17")]; fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; tensor var_3466 = transpose(perm = var_3465, x = var_3460)[name = string("transpose_67")]; tensor var_3514 = mul(x = var_3466, y = const_42_promoted)[name = string("op_3514")]; int32 var_3516 = const()[name = string("op_3516"), val = int32(-1)]; bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; tensor input_191 = concat(axis = var_3516, interleave = input_191_interleave_0, values = (var_3466, var_3514))[name = string("input_191")]; tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; fp16 var_3522_to_fp16 = const()[name = string("op_3522_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_3522_to_fp16, x = input_191)[name = string("normed_93_cast_fp16")]; tensor var_3525_split_sizes_0 = const()[name = string("op_3525_split_sizes_0"), val = tensor([64, 64])]; int32 var_3525_axis_0 = const()[name = string("op_3525_axis_0"), val = int32(-1)]; tensor var_3525_0, tensor var_3525_1 = split(axis = var_3525_axis_0, split_sizes = var_3525_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_3525")]; tensor k_17 = mul(x = var_3525_0, y = layers_12_self_attn_k_layernorm_weight)[name = string("k_17")]; tensor var_3528 = mul(x = q_17, y = cos)[name = string("op_3528")]; tensor var_3529_split_sizes_0 = const()[name = string("op_3529_split_sizes_0"), val = tensor([32, 32])]; int32 var_3529_axis_0 = const()[name = string("op_3529_axis_0"), val = int32(-1)]; tensor var_3529_0, tensor var_3529_1 = split(axis = var_3529_axis_0, split_sizes = var_3529_split_sizes_0, x = q_17)[name = string("op_3529")]; fp16 const_43_promoted = const()[name = string("const_43_promoted"), val = fp16(-0x1p+0)]; tensor var_3531 = mul(x = var_3529_1, y = const_43_promoted)[name = string("op_3531")]; int32 var_3533 = const()[name = string("op_3533"), val = int32(-1)]; bool var_3534_interleave_0 = const()[name = string("op_3534_interleave_0"), val = bool(false)]; tensor var_3534 = concat(axis = var_3533, interleave = var_3534_interleave_0, values = (var_3531, var_3529_0))[name = string("op_3534")]; tensor var_3535 = mul(x = var_3534, y = sin)[name = string("op_3535")]; tensor q_19 = add(x = var_3528, y = var_3535)[name = string("q_19")]; tensor var_3538 = mul(x = k_17, y = cos)[name = string("op_3538")]; tensor var_3539_split_sizes_0 = const()[name = string("op_3539_split_sizes_0"), val = tensor([32, 32])]; int32 var_3539_axis_0 = const()[name = string("op_3539_axis_0"), val = int32(-1)]; tensor var_3539_0, tensor var_3539_1 = split(axis = var_3539_axis_0, split_sizes = var_3539_split_sizes_0, x = k_17)[name = string("op_3539")]; fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; tensor var_3541 = mul(x = var_3539_1, y = const_44_promoted)[name = string("op_3541")]; int32 var_3543 = const()[name = string("op_3543"), val = int32(-1)]; bool var_3544_interleave_0 = const()[name = string("op_3544_interleave_0"), val = bool(false)]; tensor var_3544 = concat(axis = var_3543, interleave = var_3544_interleave_0, values = (var_3541, var_3539_0))[name = string("op_3544")]; tensor var_3545 = mul(x = var_3544, y = sin)[name = string("op_3545")]; tensor k_19 = add(x = var_3538, y = var_3545)[name = string("k_19")]; tensor var_3550_begin_0 = const()[name = string("op_3550_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3550_end_0 = const()[name = string("op_3550_end_0"), val = tensor([5, 8, 2048, 64])]; tensor var_3550_end_mask_0 = const()[name = string("op_3550_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3550_squeeze_mask_0 = const()[name = string("op_3550_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3550_cast_fp16 = slice_by_index(begin = var_3550_begin_0, end = var_3550_end_0, end_mask = var_3550_end_mask_0, squeeze_mask = var_3550_squeeze_mask_0, x = coreml_update_state_19)[name = string("op_3550_cast_fp16")]; tensor K_cache_9_axes_0 = const()[name = string("K_cache_9_axes_0"), val = tensor([0])]; tensor K_cache_9_cast_fp16 = expand_dims(axes = K_cache_9_axes_0, x = var_3550_cast_fp16)[name = string("K_cache_9_cast_fp16")]; tensor var_3555_begin_0 = const()[name = string("op_3555_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_3555_end_0 = const()[name = string("op_3555_end_0"), val = tensor([11, 8, 2048, 64])]; tensor var_3555_end_mask_0 = const()[name = string("op_3555_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3555_squeeze_mask_0 = const()[name = string("op_3555_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3555_cast_fp16 = slice_by_index(begin = var_3555_begin_0, end = var_3555_end_0, end_mask = var_3555_end_mask_0, squeeze_mask = var_3555_squeeze_mask_0, x = coreml_update_state_19)[name = string("op_3555_cast_fp16")]; tensor V_cache_9_axes_0 = const()[name = string("V_cache_9_axes_0"), val = tensor([0])]; tensor V_cache_9_cast_fp16 = expand_dims(axes = V_cache_9_axes_0, x = var_3555_cast_fp16)[name = string("V_cache_9_cast_fp16")]; tensor k_b_9_reps_0 = const()[name = string("k_b_9_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b_9 = tile(reps = k_b_9_reps_0, x = k_19)[name = string("k_b_9")]; tensor v_b_9_reps_0 = const()[name = string("v_b_9_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_3494 = transpose(perm = var_3493, x = var_3488)[name = string("transpose_66")]; tensor v_b_9 = tile(reps = v_b_9_reps_0, x = var_3494)[name = string("v_b_9")]; tensor var_3563_cast_fp16 = mul(x = K_cache_9_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3563_cast_fp16")]; tensor var_3564_cast_fp16 = mul(x = k_b_9, y = update_mask)[name = string("op_3564_cast_fp16")]; tensor K_new_9_cast_fp16 = add(x = var_3563_cast_fp16, y = var_3564_cast_fp16)[name = string("K_new_9_cast_fp16")]; tensor var_3570_cast_fp16 = mul(x = V_cache_9_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3570_cast_fp16")]; tensor var_3571_cast_fp16 = mul(x = v_b_9, y = update_mask)[name = string("op_3571_cast_fp16")]; tensor V_new_9_cast_fp16 = add(x = var_3570_cast_fp16, y = var_3571_cast_fp16)[name = string("V_new_9_cast_fp16")]; tensor var_3575_axes_0 = const()[name = string("op_3575_axes_0"), val = tensor([0])]; tensor var_3575_cast_fp16 = squeeze(axes = var_3575_axes_0, x = K_new_9_cast_fp16)[name = string("op_3575_cast_fp16")]; tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; tensor concat_33 = const()[name = string("concat_33"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_32, begin_mask = kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_33, end_mask = kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_9_stride_0, update = var_3575_cast_fp16, x = coreml_update_state_19)[name = string("kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_9_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_20 = read_state(input = kv_cache_0)[name = string("coreml_update_state_20")]; tensor var_3582_axes_0 = const()[name = string("op_3582_axes_0"), val = tensor([0])]; tensor var_3582_cast_fp16 = squeeze(axes = var_3582_axes_0, x = V_new_9_cast_fp16)[name = string("op_3582_cast_fp16")]; tensor concat_34 = const()[name = string("concat_34"), val = tensor([10, 0, 0, 0])]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_34, begin_mask = kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_35, end_mask = kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3582_cast_fp16, x = coreml_update_state_20)[name = string("kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_10_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_21 = read_state(input = kv_cache_0)[name = string("coreml_update_state_21")]; tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_new_9_cast_fp16)[name = string("transpose_65")]; tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; tensor concat_36 = const()[name = string("concat_36"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_36, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_64")]; tensor reshape_17_cast_fp16 = reshape(shape = concat_37, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_new_9_cast_fp16)[name = string("transpose_62")]; tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; tensor concat_38 = const()[name = string("concat_38"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_38, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_61")]; tensor reshape_19_cast_fp16 = reshape(shape = concat_39, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; tensor V_e_9_perm_0 = const()[name = string("V_e_9_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_3609_transpose_x_0 = const()[name = string("op_3609_transpose_x_0"), val = bool(false)]; bool var_3609_transpose_y_0 = const()[name = string("op_3609_transpose_y_0"), val = bool(false)]; tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_63")]; tensor var_3609_cast_fp16 = matmul(transpose_x = var_3609_transpose_x_0, transpose_y = var_3609_transpose_y_0, x = q_19, y = transpose_28_cast_fp16)[name = string("op_3609_cast_fp16")]; fp16 var_3610_to_fp16 = const()[name = string("op_3610_to_fp16"), val = fp16(0x1p-3)]; tensor attn_25_cast_fp16 = mul(x = var_3609_cast_fp16, y = var_3610_to_fp16)[name = string("attn_25_cast_fp16")]; tensor attn_27_cast_fp16 = add(x = attn_25_cast_fp16, y = causal_mask)[name = string("attn_27_cast_fp16")]; int32 var_3619 = const()[name = string("op_3619"), val = int32(-1)]; tensor var_3621_cast_fp16 = softmax(axis = var_3619, x = attn_27_cast_fp16)[name = string("op_3621_cast_fp16")]; bool var_3637_transpose_x_0 = const()[name = string("op_3637_transpose_x_0"), val = bool(false)]; bool var_3637_transpose_y_0 = const()[name = string("op_3637_transpose_y_0"), val = bool(false)]; tensor V_e_9_cast_fp16 = transpose(perm = V_e_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_60")]; tensor var_3637_cast_fp16 = matmul(transpose_x = var_3637_transpose_x_0, transpose_y = var_3637_transpose_y_0, x = var_3621_cast_fp16, y = V_e_9_cast_fp16)[name = string("op_3637_cast_fp16")]; tensor var_3647 = const()[name = string("op_3647"), val = tensor([0, 2, 1, 3])]; tensor var_3654 = const()[name = string("op_3654"), val = tensor([1, 1, -1])]; tensor var_3648 = transpose(perm = var_3647, x = var_3637_cast_fp16)[name = string("transpose_59")]; tensor out_19 = reshape(shape = var_3654, x = var_3648)[name = string("out_19")]; tensor var_3659 = const()[name = string("op_3659"), val = tensor([0, 2, 1])]; tensor squeeze_4 = const()[name = string("squeeze_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835825792)))]; string var_3675_pad_type_0 = const()[name = string("op_3675_pad_type_0"), val = string("valid")]; int32 var_3675_groups_0 = const()[name = string("op_3675_groups_0"), val = int32(1)]; tensor var_3675_strides_0 = const()[name = string("op_3675_strides_0"), val = tensor([1])]; tensor var_3675_pad_0 = const()[name = string("op_3675_pad_0"), val = tensor([0, 0])]; tensor var_3675_dilations_0 = const()[name = string("op_3675_dilations_0"), val = tensor([1])]; tensor var_3660 = transpose(perm = var_3659, x = out_19)[name = string("transpose_58")]; tensor var_3675 = conv(dilations = var_3675_dilations_0, groups = var_3675_groups_0, pad = var_3675_pad_0, pad_type = var_3675_pad_type_0, strides = var_3675_strides_0, weight = squeeze_4, x = var_3660)[name = string("op_3675")]; tensor var_3679 = const()[name = string("op_3679"), val = tensor([0, 2, 1])]; tensor op_out_25 = transpose(perm = var_3679, x = var_3675)[name = string("transpose_57")]; tensor x_71_cast_fp16 = add(x = x_65_cast_fp16, y = op_out_25)[name = string("x_71_cast_fp16")]; fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3683_cast_fp16 = mul(x = x_71_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3683_cast_fp16")]; int32 var_3685 = const()[name = string("op_3685"), val = int32(-1)]; bool input_195_interleave_0 = const()[name = string("input_195_interleave_0"), val = bool(false)]; tensor input_195_cast_fp16 = concat(axis = var_3685, interleave = input_195_interleave_0, values = (x_71_cast_fp16, var_3683_cast_fp16))[name = string("input_195_cast_fp16")]; tensor normed_95_axes_0 = const()[name = string("normed_95_axes_0"), val = tensor([-1])]; fp16 var_3691_to_fp16 = const()[name = string("op_3691_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_95_cast_fp16 = layer_norm(axes = normed_95_axes_0, epsilon = var_3691_to_fp16, x = input_195_cast_fp16)[name = string("normed_95_cast_fp16")]; tensor var_3694_split_sizes_0 = const()[name = string("op_3694_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3694_axis_0 = const()[name = string("op_3694_axis_0"), val = int32(-1)]; tensor var_3694_cast_fp16_0, tensor var_3694_cast_fp16_1 = split(axis = var_3694_axis_0, split_sizes = var_3694_split_sizes_0, x = normed_95_cast_fp16)[name = string("op_3694_cast_fp16")]; tensor layers_12_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_12_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837923008)))]; tensor normed_97_cast_fp16 = mul(x = var_3694_cast_fp16_0, y = layers_12_ffn_norm_weight_promoted_to_fp16)[name = string("normed_97_cast_fp16")]; tensor var_3700 = const()[name = string("op_3700"), val = tensor([0, 2, 1])]; tensor var_3703_axes_0 = const()[name = string("op_3703_axes_0"), val = tensor([2])]; tensor var_3701_cast_fp16 = transpose(perm = var_3700, x = normed_97_cast_fp16)[name = string("transpose_56")]; tensor var_3703_cast_fp16 = expand_dims(axes = var_3703_axes_0, x = var_3701_cast_fp16)[name = string("op_3703_cast_fp16")]; string input_199_pad_type_0 = const()[name = string("input_199_pad_type_0"), val = string("valid")]; tensor input_199_strides_0 = const()[name = string("input_199_strides_0"), val = tensor([1, 1])]; tensor input_199_pad_0 = const()[name = string("input_199_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_199_dilations_0 = const()[name = string("input_199_dilations_0"), val = tensor([1, 1])]; int32 input_199_groups_0 = const()[name = string("input_199_groups_0"), val = int32(1)]; tensor input_199 = conv(dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = layers_12_feed_forward_w1_weight, x = var_3703_cast_fp16)[name = string("input_199")]; string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = layers_12_feed_forward_w3_weight, x = var_3703_cast_fp16)[name = string("b_25")]; tensor var_3731 = silu(x = input_199)[name = string("op_3731")]; tensor input_201 = mul(x = var_3731, y = b_25)[name = string("input_201")]; string mlp_49_pad_type_0 = const()[name = string("mlp_49_pad_type_0"), val = string("valid")]; tensor mlp_49_strides_0 = const()[name = string("mlp_49_strides_0"), val = tensor([1, 1])]; tensor mlp_49_pad_0 = const()[name = string("mlp_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_49_dilations_0 = const()[name = string("mlp_49_dilations_0"), val = tensor([1, 1])]; int32 mlp_49_groups_0 = const()[name = string("mlp_49_groups_0"), val = int32(1)]; tensor mlp_49 = conv(dilations = mlp_49_dilations_0, groups = mlp_49_groups_0, pad = mlp_49_pad_0, pad_type = mlp_49_pad_type_0, strides = mlp_49_strides_0, weight = layers_12_feed_forward_w2_weight, x = input_201)[name = string("mlp_49")]; tensor var_3745_axes_0 = const()[name = string("op_3745_axes_0"), val = tensor([2])]; tensor var_3745 = squeeze(axes = var_3745_axes_0, x = mlp_49)[name = string("op_3745")]; tensor var_3749 = const()[name = string("op_3749"), val = tensor([0, 2, 1])]; tensor mlp_51 = transpose(perm = var_3749, x = var_3745)[name = string("transpose_55")]; tensor x_73_cast_fp16 = add(x = x_71_cast_fp16, y = mlp_51)[name = string("x_73_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3753_cast_fp16 = mul(x = x_73_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_3753_cast_fp16")]; int32 var_3755 = const()[name = string("op_3755"), val = int32(-1)]; bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; tensor input_203_cast_fp16 = concat(axis = var_3755, interleave = input_203_interleave_0, values = (x_73_cast_fp16, var_3753_cast_fp16))[name = string("input_203_cast_fp16")]; tensor normed_99_axes_0 = const()[name = string("normed_99_axes_0"), val = tensor([-1])]; fp16 var_3761_to_fp16 = const()[name = string("op_3761_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_99_cast_fp16 = layer_norm(axes = normed_99_axes_0, epsilon = var_3761_to_fp16, x = input_203_cast_fp16)[name = string("normed_99_cast_fp16")]; tensor var_3764_split_sizes_0 = const()[name = string("op_3764_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3764_axis_0 = const()[name = string("op_3764_axis_0"), val = int32(-1)]; tensor var_3764_cast_fp16_0, tensor var_3764_cast_fp16_1 = split(axis = var_3764_axis_0, split_sizes = var_3764_split_sizes_0, x = normed_99_cast_fp16)[name = string("op_3764_cast_fp16")]; tensor layers_13_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_13_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837925120)))]; tensor hidden_states_27_cast_fp16 = mul(x = var_3764_cast_fp16_0, y = layers_13_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor var_3770 = const()[name = string("op_3770"), val = tensor([0, 2, 1])]; tensor var_3773_axes_0 = const()[name = string("op_3773_axes_0"), val = tensor([2])]; tensor var_3771_cast_fp16 = transpose(perm = var_3770, x = hidden_states_27_cast_fp16)[name = string("transpose_54")]; tensor var_3773_cast_fp16 = expand_dims(axes = var_3773_axes_0, x = var_3771_cast_fp16)[name = string("op_3773_cast_fp16")]; string BCx_17_pad_type_0 = const()[name = string("BCx_17_pad_type_0"), val = string("valid")]; tensor BCx_17_strides_0 = const()[name = string("BCx_17_strides_0"), val = tensor([1, 1])]; tensor BCx_17_pad_0 = const()[name = string("BCx_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_17_dilations_0 = const()[name = string("BCx_17_dilations_0"), val = tensor([1, 1])]; int32 BCx_17_groups_0 = const()[name = string("BCx_17_groups_0"), val = int32(1)]; tensor BCx_17 = conv(dilations = BCx_17_dilations_0, groups = BCx_17_groups_0, pad = BCx_17_pad_0, pad_type = BCx_17_pad_type_0, strides = BCx_17_strides_0, weight = layers_13_conv_in_proj_weight, x = var_3773_cast_fp16)[name = string("BCx_17")]; tensor var_3790_split_sizes_0 = const()[name = string("op_3790_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_3790_axis_0 = const()[name = string("op_3790_axis_0"), val = int32(1)]; tensor var_3790_0, tensor var_3790_1, tensor var_3790_2 = split(axis = var_3790_axis_0, split_sizes = var_3790_split_sizes_0, x = BCx_17)[name = string("op_3790")]; tensor Bx_17 = mul(x = var_3790_0, y = var_3790_2)[name = string("Bx_17")]; tensor var_3796_begin_0 = const()[name = string("op_3796_begin_0"), val = tensor([8, 0, 0])]; tensor var_3796_end_0 = const()[name = string("op_3796_end_0"), val = tensor([9, 1024, 3])]; tensor var_3796_end_mask_0 = const()[name = string("op_3796_end_mask_0"), val = tensor([false, true, true])]; tensor var_3796_squeeze_mask_0 = const()[name = string("op_3796_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_3796_cast_fp16 = slice_by_index(begin = var_3796_begin_0, end = var_3796_end_0, end_mask = var_3796_end_mask_0, squeeze_mask = var_3796_squeeze_mask_0, x = conv_state_in)[name = string("op_3796_cast_fp16")]; tensor var_3798_axes_0 = const()[name = string("op_3798_axes_0"), val = tensor([0])]; tensor var_3798_cast_fp16 = expand_dims(axes = var_3798_axes_0, x = var_3796_cast_fp16)[name = string("op_3798_cast_fp16")]; tensor slot_17_axes_0 = const()[name = string("slot_17_axes_0"), val = tensor([2])]; tensor slot_17_cast_fp16 = expand_dims(axes = slot_17_axes_0, x = var_3798_cast_fp16)[name = string("slot_17_cast_fp16")]; tensor live_tail_17_begin_0 = const()[name = string("live_tail_17_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_17_end_0 = const()[name = string("live_tail_17_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_17_end_mask_0 = const()[name = string("live_tail_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_17_cast_fp16 = slice_by_index(begin = live_tail_17_begin_0, end = live_tail_17_end_0, end_mask = live_tail_17_end_mask_0, x = slot_17_cast_fp16)[name = string("live_tail_17_cast_fp16")]; int32 var_3807 = const()[name = string("op_3807"), val = int32(-1)]; bool new_state_17_interleave_0 = const()[name = string("new_state_17_interleave_0"), val = bool(false)]; tensor new_state_17_cast_fp16 = concat(axis = var_3807, interleave = new_state_17_interleave_0, values = (live_tail_17_cast_fp16, Bx_17))[name = string("new_state_17_cast_fp16")]; tensor var_3810_axes_0 = const()[name = string("op_3810_axes_0"), val = tensor([0])]; tensor var_3810_cast_fp16 = squeeze(axes = var_3810_axes_0, x = new_state_17_cast_fp16)[name = string("op_3810_cast_fp16")]; tensor var_3812_axes_0 = const()[name = string("op_3812_axes_0"), val = tensor([1])]; tensor var_3812_cast_fp16 = squeeze(axes = var_3812_axes_0, x = var_3810_cast_fp16)[name = string("op_3812_cast_fp16")]; string conv_out_17_pad_type_0 = const()[name = string("conv_out_17_pad_type_0"), val = string("valid")]; int32 conv_out_17_groups_0 = const()[name = string("conv_out_17_groups_0"), val = int32(1024)]; tensor conv_out_17_strides_0 = const()[name = string("conv_out_17_strides_0"), val = tensor([1, 1])]; tensor conv_out_17_pad_0 = const()[name = string("conv_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_17_dilations_0 = const()[name = string("conv_out_17_dilations_0"), val = tensor([1, 1])]; tensor layers_13_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_13_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837927232)))]; tensor conv_out_17_cast_fp16 = conv(dilations = conv_out_17_dilations_0, groups = conv_out_17_groups_0, pad = conv_out_17_pad_0, pad_type = conv_out_17_pad_type_0, strides = conv_out_17_strides_0, weight = layers_13_conv_conv_weight_promoted_to_fp16, x = new_state_17_cast_fp16)[name = string("conv_out_17_cast_fp16")]; tensor input_207_cast_fp16 = mul(x = var_3790_1, y = conv_out_17_cast_fp16)[name = string("input_207_cast_fp16")]; string y_17_pad_type_0 = const()[name = string("y_17_pad_type_0"), val = string("valid")]; tensor y_17_strides_0 = const()[name = string("y_17_strides_0"), val = tensor([1, 1])]; tensor y_17_pad_0 = const()[name = string("y_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_17_dilations_0 = const()[name = string("y_17_dilations_0"), val = tensor([1, 1])]; int32 y_17_groups_0 = const()[name = string("y_17_groups_0"), val = int32(1)]; tensor layers_13_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_13_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837933440)))]; tensor y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = layers_13_conv_out_proj_weight_promoted_to_fp16, x = input_207_cast_fp16)[name = string("y_17_cast_fp16")]; tensor var_3838_axes_0 = const()[name = string("op_3838_axes_0"), val = tensor([2])]; tensor var_3838_cast_fp16 = squeeze(axes = var_3838_axes_0, x = y_17_cast_fp16)[name = string("op_3838_cast_fp16")]; tensor var_3842 = const()[name = string("op_3842"), val = tensor([0, 2, 1])]; tensor op_out_27_cast_fp16 = transpose(perm = var_3842, x = var_3838_cast_fp16)[name = string("transpose_53")]; tensor x_75_cast_fp16 = add(x = x_73_cast_fp16, y = op_out_27_cast_fp16)[name = string("x_75_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3846_cast_fp16 = mul(x = x_75_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_3846_cast_fp16")]; int32 var_3848 = const()[name = string("op_3848"), val = int32(-1)]; bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; tensor input_209_cast_fp16 = concat(axis = var_3848, interleave = input_209_interleave_0, values = (x_75_cast_fp16, var_3846_cast_fp16))[name = string("input_209_cast_fp16")]; tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_3854_to_fp16, x = input_209_cast_fp16)[name = string("normed_101_cast_fp16")]; tensor var_3857_split_sizes_0 = const()[name = string("op_3857_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3857_axis_0 = const()[name = string("op_3857_axis_0"), val = int32(-1)]; tensor var_3857_cast_fp16_0, tensor var_3857_cast_fp16_1 = split(axis = var_3857_axis_0, split_sizes = var_3857_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_3857_cast_fp16")]; tensor layers_13_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_13_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840030656)))]; tensor normed_103_cast_fp16 = mul(x = var_3857_cast_fp16_0, y = layers_13_ffn_norm_weight_promoted_to_fp16)[name = string("normed_103_cast_fp16")]; tensor var_3863 = const()[name = string("op_3863"), val = tensor([0, 2, 1])]; tensor var_3866_axes_0 = const()[name = string("op_3866_axes_0"), val = tensor([2])]; tensor var_3864_cast_fp16 = transpose(perm = var_3863, x = normed_103_cast_fp16)[name = string("transpose_52")]; tensor var_3866_cast_fp16 = expand_dims(axes = var_3866_axes_0, x = var_3864_cast_fp16)[name = string("op_3866_cast_fp16")]; string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = layers_13_feed_forward_w1_weight, x = var_3866_cast_fp16)[name = string("input_213")]; string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = layers_13_feed_forward_w3_weight, x = var_3866_cast_fp16)[name = string("b_27")]; tensor var_3894 = silu(x = input_213)[name = string("op_3894")]; tensor input_215 = mul(x = var_3894, y = b_27)[name = string("input_215")]; string mlp_53_pad_type_0 = const()[name = string("mlp_53_pad_type_0"), val = string("valid")]; tensor mlp_53_strides_0 = const()[name = string("mlp_53_strides_0"), val = tensor([1, 1])]; tensor mlp_53_pad_0 = const()[name = string("mlp_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_53_dilations_0 = const()[name = string("mlp_53_dilations_0"), val = tensor([1, 1])]; int32 mlp_53_groups_0 = const()[name = string("mlp_53_groups_0"), val = int32(1)]; tensor mlp_53 = conv(dilations = mlp_53_dilations_0, groups = mlp_53_groups_0, pad = mlp_53_pad_0, pad_type = mlp_53_pad_type_0, strides = mlp_53_strides_0, weight = layers_13_feed_forward_w2_weight, x = input_215)[name = string("mlp_53")]; tensor var_3908_axes_0 = const()[name = string("op_3908_axes_0"), val = tensor([2])]; tensor var_3908 = squeeze(axes = var_3908_axes_0, x = mlp_53)[name = string("op_3908")]; tensor var_3912 = const()[name = string("op_3912"), val = tensor([0, 2, 1])]; tensor mlp_55 = transpose(perm = var_3912, x = var_3908)[name = string("transpose_51")]; tensor x_77_cast_fp16 = add(x = x_75_cast_fp16, y = mlp_55)[name = string("x_77_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3916_cast_fp16 = mul(x = x_77_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_3916_cast_fp16")]; int32 var_3918 = const()[name = string("op_3918"), val = int32(-1)]; bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; tensor input_217_cast_fp16 = concat(axis = var_3918, interleave = input_217_interleave_0, values = (x_77_cast_fp16, var_3916_cast_fp16))[name = string("input_217_cast_fp16")]; tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; fp16 var_3924_to_fp16 = const()[name = string("op_3924_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_3924_to_fp16, x = input_217_cast_fp16)[name = string("normed_105_cast_fp16")]; tensor var_3927_split_sizes_0 = const()[name = string("op_3927_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_3927_axis_0 = const()[name = string("op_3927_axis_0"), val = int32(-1)]; tensor var_3927_cast_fp16_0, tensor var_3927_cast_fp16_1 = split(axis = var_3927_axis_0, split_sizes = var_3927_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_3927_cast_fp16")]; tensor layers_14_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_14_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840032768)))]; tensor hidden_states_29_cast_fp16 = mul(x = var_3927_cast_fp16_0, y = layers_14_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor var_3933 = const()[name = string("op_3933"), val = tensor([0, 2, 1])]; tensor var_3936_axes_0 = const()[name = string("op_3936_axes_0"), val = tensor([2])]; tensor var_3934_cast_fp16 = transpose(perm = var_3933, x = hidden_states_29_cast_fp16)[name = string("transpose_50")]; tensor var_3936_cast_fp16 = expand_dims(axes = var_3936_axes_0, x = var_3934_cast_fp16)[name = string("op_3936_cast_fp16")]; string var_3952_pad_type_0 = const()[name = string("op_3952_pad_type_0"), val = string("valid")]; tensor var_3952_strides_0 = const()[name = string("op_3952_strides_0"), val = tensor([1, 1])]; tensor var_3952_pad_0 = const()[name = string("op_3952_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3952_dilations_0 = const()[name = string("op_3952_dilations_0"), val = tensor([1, 1])]; int32 var_3952_groups_0 = const()[name = string("op_3952_groups_0"), val = int32(1)]; tensor var_3952 = conv(dilations = var_3952_dilations_0, groups = var_3952_groups_0, pad = var_3952_pad_0, pad_type = var_3952_pad_type_0, strides = var_3952_strides_0, weight = layers_14_self_attn_q_proj_weight, x = var_3936_cast_fp16)[name = string("op_3952")]; tensor var_3957 = const()[name = string("op_3957"), val = tensor([1, 16, 64, 1])]; tensor var_3958 = reshape(shape = var_3957, x = var_3952)[name = string("op_3958")]; tensor var_3963 = const()[name = string("op_3963"), val = tensor([0, 1, 3, 2])]; string var_3980_pad_type_0 = const()[name = string("op_3980_pad_type_0"), val = string("valid")]; tensor var_3980_strides_0 = const()[name = string("op_3980_strides_0"), val = tensor([1, 1])]; tensor var_3980_pad_0 = const()[name = string("op_3980_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3980_dilations_0 = const()[name = string("op_3980_dilations_0"), val = tensor([1, 1])]; int32 var_3980_groups_0 = const()[name = string("op_3980_groups_0"), val = int32(1)]; tensor var_3980 = conv(dilations = var_3980_dilations_0, groups = var_3980_groups_0, pad = var_3980_pad_0, pad_type = var_3980_pad_type_0, strides = var_3980_strides_0, weight = layers_14_self_attn_k_proj_weight, x = var_3936_cast_fp16)[name = string("op_3980")]; tensor var_3985 = const()[name = string("op_3985"), val = tensor([1, 8, 64, 1])]; tensor var_3986 = reshape(shape = var_3985, x = var_3980)[name = string("op_3986")]; tensor var_3991 = const()[name = string("op_3991"), val = tensor([0, 1, 3, 2])]; string var_4008_pad_type_0 = const()[name = string("op_4008_pad_type_0"), val = string("valid")]; tensor var_4008_strides_0 = const()[name = string("op_4008_strides_0"), val = tensor([1, 1])]; tensor var_4008_pad_0 = const()[name = string("op_4008_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4008_dilations_0 = const()[name = string("op_4008_dilations_0"), val = tensor([1, 1])]; int32 var_4008_groups_0 = const()[name = string("op_4008_groups_0"), val = int32(1)]; tensor var_4008 = conv(dilations = var_4008_dilations_0, groups = var_4008_groups_0, pad = var_4008_pad_0, pad_type = var_4008_pad_type_0, strides = var_4008_strides_0, weight = layers_14_self_attn_v_proj_weight, x = var_3936_cast_fp16)[name = string("op_4008")]; tensor var_4013 = const()[name = string("op_4013"), val = tensor([1, 8, 64, 1])]; tensor var_4014 = reshape(shape = var_4013, x = var_4008)[name = string("op_4014")]; tensor var_4019 = const()[name = string("op_4019"), val = tensor([0, 1, 3, 2])]; fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; tensor var_3964 = transpose(perm = var_3963, x = var_3958)[name = string("transpose_49")]; tensor var_4026 = mul(x = var_3964, y = const_49_promoted)[name = string("op_4026")]; int32 var_4028 = const()[name = string("op_4028"), val = int32(-1)]; bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; tensor input_221 = concat(axis = var_4028, interleave = input_221_interleave_0, values = (var_3964, var_4026))[name = string("input_221")]; tensor normed_107_axes_0 = const()[name = string("normed_107_axes_0"), val = tensor([-1])]; fp16 var_4034_to_fp16 = const()[name = string("op_4034_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_107_cast_fp16 = layer_norm(axes = normed_107_axes_0, epsilon = var_4034_to_fp16, x = input_221)[name = string("normed_107_cast_fp16")]; tensor var_4037_split_sizes_0 = const()[name = string("op_4037_split_sizes_0"), val = tensor([64, 64])]; int32 var_4037_axis_0 = const()[name = string("op_4037_axis_0"), val = int32(-1)]; tensor var_4037_0, tensor var_4037_1 = split(axis = var_4037_axis_0, split_sizes = var_4037_split_sizes_0, x = normed_107_cast_fp16)[name = string("op_4037")]; tensor q_21 = mul(x = var_4037_0, y = layers_14_self_attn_q_layernorm_weight)[name = string("q_21")]; fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; tensor var_3992 = transpose(perm = var_3991, x = var_3986)[name = string("transpose_48")]; tensor var_4040 = mul(x = var_3992, y = const_50_promoted)[name = string("op_4040")]; int32 var_4042 = const()[name = string("op_4042"), val = int32(-1)]; bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; tensor input_223 = concat(axis = var_4042, interleave = input_223_interleave_0, values = (var_3992, var_4040))[name = string("input_223")]; tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; fp16 var_4048_to_fp16 = const()[name = string("op_4048_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4048_to_fp16, x = input_223)[name = string("normed_109_cast_fp16")]; tensor var_4051_split_sizes_0 = const()[name = string("op_4051_split_sizes_0"), val = tensor([64, 64])]; int32 var_4051_axis_0 = const()[name = string("op_4051_axis_0"), val = int32(-1)]; tensor var_4051_0, tensor var_4051_1 = split(axis = var_4051_axis_0, split_sizes = var_4051_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_4051")]; tensor k_21 = mul(x = var_4051_0, y = layers_14_self_attn_k_layernorm_weight)[name = string("k_21")]; tensor var_4054 = mul(x = q_21, y = cos)[name = string("op_4054")]; tensor var_4055_split_sizes_0 = const()[name = string("op_4055_split_sizes_0"), val = tensor([32, 32])]; int32 var_4055_axis_0 = const()[name = string("op_4055_axis_0"), val = int32(-1)]; tensor var_4055_0, tensor var_4055_1 = split(axis = var_4055_axis_0, split_sizes = var_4055_split_sizes_0, x = q_21)[name = string("op_4055")]; fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; tensor var_4057 = mul(x = var_4055_1, y = const_51_promoted)[name = string("op_4057")]; int32 var_4059 = const()[name = string("op_4059"), val = int32(-1)]; bool var_4060_interleave_0 = const()[name = string("op_4060_interleave_0"), val = bool(false)]; tensor var_4060 = concat(axis = var_4059, interleave = var_4060_interleave_0, values = (var_4057, var_4055_0))[name = string("op_4060")]; tensor var_4061 = mul(x = var_4060, y = sin)[name = string("op_4061")]; tensor q = add(x = var_4054, y = var_4061)[name = string("q")]; tensor var_4064 = mul(x = k_21, y = cos)[name = string("op_4064")]; tensor var_4065_split_sizes_0 = const()[name = string("op_4065_split_sizes_0"), val = tensor([32, 32])]; int32 var_4065_axis_0 = const()[name = string("op_4065_axis_0"), val = int32(-1)]; tensor var_4065_0, tensor var_4065_1 = split(axis = var_4065_axis_0, split_sizes = var_4065_split_sizes_0, x = k_21)[name = string("op_4065")]; fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; tensor var_4067 = mul(x = var_4065_1, y = const_52_promoted)[name = string("op_4067")]; int32 var_4069 = const()[name = string("op_4069"), val = int32(-1)]; bool var_4070_interleave_0 = const()[name = string("op_4070_interleave_0"), val = bool(false)]; tensor var_4070 = concat(axis = var_4069, interleave = var_4070_interleave_0, values = (var_4067, var_4065_0))[name = string("op_4070")]; tensor var_4071 = mul(x = var_4070, y = sin)[name = string("op_4071")]; tensor k = add(x = var_4064, y = var_4071)[name = string("k")]; tensor var_4076_begin_0 = const()[name = string("op_4076_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_4076_end_0 = const()[name = string("op_4076_end_0"), val = tensor([6, 8, 2048, 64])]; tensor var_4076_end_mask_0 = const()[name = string("op_4076_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4076_squeeze_mask_0 = const()[name = string("op_4076_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4076_cast_fp16 = slice_by_index(begin = var_4076_begin_0, end = var_4076_end_0, end_mask = var_4076_end_mask_0, squeeze_mask = var_4076_squeeze_mask_0, x = coreml_update_state_21)[name = string("op_4076_cast_fp16")]; tensor K_cache_axes_0 = const()[name = string("K_cache_axes_0"), val = tensor([0])]; tensor K_cache_cast_fp16 = expand_dims(axes = K_cache_axes_0, x = var_4076_cast_fp16)[name = string("K_cache_cast_fp16")]; tensor var_4081_begin_0 = const()[name = string("op_4081_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_4081_end_0 = const()[name = string("op_4081_end_0"), val = tensor([12, 8, 2048, 64])]; tensor var_4081_end_mask_0 = const()[name = string("op_4081_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4081_squeeze_mask_0 = const()[name = string("op_4081_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4081_cast_fp16 = slice_by_index(begin = var_4081_begin_0, end = var_4081_end_0, end_mask = var_4081_end_mask_0, squeeze_mask = var_4081_squeeze_mask_0, x = coreml_update_state_21)[name = string("op_4081_cast_fp16")]; tensor V_cache_axes_0 = const()[name = string("V_cache_axes_0"), val = tensor([0])]; tensor V_cache_cast_fp16 = expand_dims(axes = V_cache_axes_0, x = var_4081_cast_fp16)[name = string("V_cache_cast_fp16")]; tensor k_b_reps_0 = const()[name = string("k_b_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_b = tile(reps = k_b_reps_0, x = k)[name = string("k_b")]; tensor v_b_reps_0 = const()[name = string("v_b_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor var_4020 = transpose(perm = var_4019, x = var_4014)[name = string("transpose_47")]; tensor v_b = tile(reps = v_b_reps_0, x = var_4020)[name = string("v_b")]; tensor var_4089_cast_fp16 = mul(x = K_cache_cast_fp16, y = var_1132_cast_fp16)[name = string("op_4089_cast_fp16")]; tensor var_4090_cast_fp16 = mul(x = k_b, y = update_mask)[name = string("op_4090_cast_fp16")]; tensor K_new_cast_fp16 = add(x = var_4089_cast_fp16, y = var_4090_cast_fp16)[name = string("K_new_cast_fp16")]; tensor var_4096_cast_fp16 = mul(x = V_cache_cast_fp16, y = var_1132_cast_fp16)[name = string("op_4096_cast_fp16")]; tensor var_4097_cast_fp16 = mul(x = v_b, y = update_mask)[name = string("op_4097_cast_fp16")]; tensor V_new_cast_fp16 = add(x = var_4096_cast_fp16, y = var_4097_cast_fp16)[name = string("V_new_cast_fp16")]; tensor var_4101_axes_0 = const()[name = string("op_4101_axes_0"), val = tensor([0])]; tensor var_4101_cast_fp16 = squeeze(axes = var_4101_axes_0, x = K_new_cast_fp16)[name = string("op_4101_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([5, 0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_40, begin_mask = kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_41, end_mask = kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_11_stride_0, update = var_4101_cast_fp16, x = coreml_update_state_21)[name = string("kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_11_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_22 = read_state(input = kv_cache_0)[name = string("coreml_update_state_22")]; tensor var_4108_axes_0 = const()[name = string("op_4108_axes_0"), val = tensor([0])]; tensor var_4108_cast_fp16 = squeeze(axes = var_4108_axes_0, x = V_new_cast_fp16)[name = string("op_4108_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([11, 0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_42, begin_mask = kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_43, end_mask = kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_12_stride_0, update = var_4108_cast_fp16, x = coreml_update_state_22)[name = string("kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_12_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_new_cast_fp16)[name = string("transpose_46")]; tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; tensor concat_44 = const()[name = string("concat_44"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_44, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_45")]; tensor reshape_21_cast_fp16 = reshape(shape = concat_45, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 3, 2])]; tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([2, 1, 1, 1])]; tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_new_cast_fp16)[name = string("transpose_43")]; tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; tensor concat_46 = const()[name = string("concat_46"), val = tensor([2, 8, 1, 2048, 64])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_46, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_47 = const()[name = string("concat_47"), val = tensor([-1, 1, 2048, 64])]; tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_42")]; tensor reshape_23_cast_fp16 = reshape(shape = concat_47, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor V_e_perm_0 = const()[name = string("V_e_perm_0"), val = tensor([1, 0, 2, 3])]; bool var_4135_transpose_x_0 = const()[name = string("op_4135_transpose_x_0"), val = bool(false)]; bool var_4135_transpose_y_0 = const()[name = string("op_4135_transpose_y_0"), val = bool(false)]; tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_44")]; tensor var_4135_cast_fp16 = matmul(transpose_x = var_4135_transpose_x_0, transpose_y = var_4135_transpose_y_0, x = q, y = transpose_29_cast_fp16)[name = string("op_4135_cast_fp16")]; fp16 var_4136_to_fp16 = const()[name = string("op_4136_to_fp16"), val = fp16(0x1p-3)]; tensor attn_31_cast_fp16 = mul(x = var_4135_cast_fp16, y = var_4136_to_fp16)[name = string("attn_31_cast_fp16")]; tensor attn_33_cast_fp16 = add(x = attn_31_cast_fp16, y = causal_mask)[name = string("attn_33_cast_fp16")]; int32 var_4145 = const()[name = string("op_4145"), val = int32(-1)]; tensor var_4147_cast_fp16 = softmax(axis = var_4145, x = attn_33_cast_fp16)[name = string("op_4147_cast_fp16")]; bool var_4163_transpose_x_0 = const()[name = string("op_4163_transpose_x_0"), val = bool(false)]; bool var_4163_transpose_y_0 = const()[name = string("op_4163_transpose_y_0"), val = bool(false)]; tensor V_e_cast_fp16 = transpose(perm = V_e_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_41")]; tensor var_4163_cast_fp16 = matmul(transpose_x = var_4163_transpose_x_0, transpose_y = var_4163_transpose_y_0, x = var_4147_cast_fp16, y = V_e_cast_fp16)[name = string("op_4163_cast_fp16")]; tensor var_4173 = const()[name = string("op_4173"), val = tensor([0, 2, 1, 3])]; tensor var_4180 = const()[name = string("op_4180"), val = tensor([1, 1, -1])]; tensor var_4174 = transpose(perm = var_4173, x = var_4163_cast_fp16)[name = string("transpose_40")]; tensor out = reshape(shape = var_4180, x = var_4174)[name = string("out")]; tensor var_4185 = const()[name = string("op_4185"), val = tensor([0, 2, 1])]; tensor squeeze_5 = const()[name = string("squeeze_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840034880)))]; string var_4201_pad_type_0 = const()[name = string("op_4201_pad_type_0"), val = string("valid")]; int32 var_4201_groups_0 = const()[name = string("op_4201_groups_0"), val = int32(1)]; tensor var_4201_strides_0 = const()[name = string("op_4201_strides_0"), val = tensor([1])]; tensor var_4201_pad_0 = const()[name = string("op_4201_pad_0"), val = tensor([0, 0])]; tensor var_4201_dilations_0 = const()[name = string("op_4201_dilations_0"), val = tensor([1])]; tensor var_4186 = transpose(perm = var_4185, x = out)[name = string("transpose_39")]; tensor var_4201 = conv(dilations = var_4201_dilations_0, groups = var_4201_groups_0, pad = var_4201_pad_0, pad_type = var_4201_pad_type_0, strides = var_4201_strides_0, weight = squeeze_5, x = var_4186)[name = string("op_4201")]; tensor var_4205 = const()[name = string("op_4205"), val = tensor([0, 2, 1])]; tensor op_out_29 = transpose(perm = var_4205, x = var_4201)[name = string("transpose_38")]; tensor x_83_cast_fp16 = add(x = x_77_cast_fp16, y = op_out_29)[name = string("x_83_cast_fp16")]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4209_cast_fp16 = mul(x = x_83_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_4209_cast_fp16")]; int32 var_4211 = const()[name = string("op_4211"), val = int32(-1)]; bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; tensor input_227_cast_fp16 = concat(axis = var_4211, interleave = input_227_interleave_0, values = (x_83_cast_fp16, var_4209_cast_fp16))[name = string("input_227_cast_fp16")]; tensor normed_111_axes_0 = const()[name = string("normed_111_axes_0"), val = tensor([-1])]; fp16 var_4217_to_fp16 = const()[name = string("op_4217_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_111_cast_fp16 = layer_norm(axes = normed_111_axes_0, epsilon = var_4217_to_fp16, x = input_227_cast_fp16)[name = string("normed_111_cast_fp16")]; tensor var_4220_split_sizes_0 = const()[name = string("op_4220_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4220_axis_0 = const()[name = string("op_4220_axis_0"), val = int32(-1)]; tensor var_4220_cast_fp16_0, tensor var_4220_cast_fp16_1 = split(axis = var_4220_axis_0, split_sizes = var_4220_split_sizes_0, x = normed_111_cast_fp16)[name = string("op_4220_cast_fp16")]; tensor layers_14_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_14_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842132096)))]; tensor normed_113_cast_fp16 = mul(x = var_4220_cast_fp16_0, y = layers_14_ffn_norm_weight_promoted_to_fp16)[name = string("normed_113_cast_fp16")]; tensor var_4226 = const()[name = string("op_4226"), val = tensor([0, 2, 1])]; tensor var_4229_axes_0 = const()[name = string("op_4229_axes_0"), val = tensor([2])]; tensor var_4227_cast_fp16 = transpose(perm = var_4226, x = normed_113_cast_fp16)[name = string("transpose_37")]; tensor var_4229_cast_fp16 = expand_dims(axes = var_4229_axes_0, x = var_4227_cast_fp16)[name = string("op_4229_cast_fp16")]; string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = layers_14_feed_forward_w1_weight, x = var_4229_cast_fp16)[name = string("input_231")]; string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = layers_14_feed_forward_w3_weight, x = var_4229_cast_fp16)[name = string("b_29")]; tensor var_4257 = silu(x = input_231)[name = string("op_4257")]; tensor input_233 = mul(x = var_4257, y = b_29)[name = string("input_233")]; string mlp_57_pad_type_0 = const()[name = string("mlp_57_pad_type_0"), val = string("valid")]; tensor mlp_57_strides_0 = const()[name = string("mlp_57_strides_0"), val = tensor([1, 1])]; tensor mlp_57_pad_0 = const()[name = string("mlp_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_57_dilations_0 = const()[name = string("mlp_57_dilations_0"), val = tensor([1, 1])]; int32 mlp_57_groups_0 = const()[name = string("mlp_57_groups_0"), val = int32(1)]; tensor mlp_57 = conv(dilations = mlp_57_dilations_0, groups = mlp_57_groups_0, pad = mlp_57_pad_0, pad_type = mlp_57_pad_type_0, strides = mlp_57_strides_0, weight = layers_14_feed_forward_w2_weight, x = input_233)[name = string("mlp_57")]; tensor var_4271_axes_0 = const()[name = string("op_4271_axes_0"), val = tensor([2])]; tensor var_4271 = squeeze(axes = var_4271_axes_0, x = mlp_57)[name = string("op_4271")]; tensor var_4275 = const()[name = string("op_4275"), val = tensor([0, 2, 1])]; tensor mlp_59 = transpose(perm = var_4275, x = var_4271)[name = string("transpose_36")]; tensor x_85_cast_fp16 = add(x = x_83_cast_fp16, y = mlp_59)[name = string("x_85_cast_fp16")]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4279_cast_fp16 = mul(x = x_85_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_4279_cast_fp16")]; int32 var_4281 = const()[name = string("op_4281"), val = int32(-1)]; bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; tensor input_235_cast_fp16 = concat(axis = var_4281, interleave = input_235_interleave_0, values = (x_85_cast_fp16, var_4279_cast_fp16))[name = string("input_235_cast_fp16")]; tensor normed_115_axes_0 = const()[name = string("normed_115_axes_0"), val = tensor([-1])]; fp16 var_4287_to_fp16 = const()[name = string("op_4287_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_115_cast_fp16 = layer_norm(axes = normed_115_axes_0, epsilon = var_4287_to_fp16, x = input_235_cast_fp16)[name = string("normed_115_cast_fp16")]; tensor var_4290_split_sizes_0 = const()[name = string("op_4290_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4290_axis_0 = const()[name = string("op_4290_axis_0"), val = int32(-1)]; tensor var_4290_cast_fp16_0, tensor var_4290_cast_fp16_1 = split(axis = var_4290_axis_0, split_sizes = var_4290_split_sizes_0, x = normed_115_cast_fp16)[name = string("op_4290_cast_fp16")]; tensor layers_15_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_15_operator_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842134208)))]; tensor hidden_states_31_cast_fp16 = mul(x = var_4290_cast_fp16_0, y = layers_15_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_31_cast_fp16")]; tensor var_4296 = const()[name = string("op_4296"), val = tensor([0, 2, 1])]; tensor var_4299_axes_0 = const()[name = string("op_4299_axes_0"), val = tensor([2])]; tensor var_4297_cast_fp16 = transpose(perm = var_4296, x = hidden_states_31_cast_fp16)[name = string("transpose_35")]; tensor var_4299_cast_fp16 = expand_dims(axes = var_4299_axes_0, x = var_4297_cast_fp16)[name = string("op_4299_cast_fp16")]; string BCx_pad_type_0 = const()[name = string("BCx_pad_type_0"), val = string("valid")]; tensor BCx_strides_0 = const()[name = string("BCx_strides_0"), val = tensor([1, 1])]; tensor BCx_pad_0 = const()[name = string("BCx_pad_0"), val = tensor([0, 0, 0, 0])]; tensor BCx_dilations_0 = const()[name = string("BCx_dilations_0"), val = tensor([1, 1])]; int32 BCx_groups_0 = const()[name = string("BCx_groups_0"), val = int32(1)]; tensor BCx = conv(dilations = BCx_dilations_0, groups = BCx_groups_0, pad = BCx_pad_0, pad_type = BCx_pad_type_0, strides = BCx_strides_0, weight = layers_15_conv_in_proj_weight, x = var_4299_cast_fp16)[name = string("BCx")]; tensor var_4316_split_sizes_0 = const()[name = string("op_4316_split_sizes_0"), val = tensor([1024, 1024, 1024])]; int32 var_4316_axis_0 = const()[name = string("op_4316_axis_0"), val = int32(1)]; tensor var_4316_0, tensor var_4316_1, tensor var_4316_2 = split(axis = var_4316_axis_0, split_sizes = var_4316_split_sizes_0, x = BCx)[name = string("op_4316")]; tensor Bx = mul(x = var_4316_0, y = var_4316_2)[name = string("Bx")]; tensor var_4322_begin_0 = const()[name = string("op_4322_begin_0"), val = tensor([9, 0, 0])]; tensor var_4322_end_0 = const()[name = string("op_4322_end_0"), val = tensor([10, 1024, 3])]; tensor var_4322_end_mask_0 = const()[name = string("op_4322_end_mask_0"), val = tensor([false, true, true])]; tensor var_4322_squeeze_mask_0 = const()[name = string("op_4322_squeeze_mask_0"), val = tensor([true, false, false])]; tensor var_4322_cast_fp16 = slice_by_index(begin = var_4322_begin_0, end = var_4322_end_0, end_mask = var_4322_end_mask_0, squeeze_mask = var_4322_squeeze_mask_0, x = conv_state_in)[name = string("op_4322_cast_fp16")]; tensor var_4324_axes_0 = const()[name = string("op_4324_axes_0"), val = tensor([0])]; tensor var_4324_cast_fp16 = expand_dims(axes = var_4324_axes_0, x = var_4322_cast_fp16)[name = string("op_4324_cast_fp16")]; tensor slot_axes_0 = const()[name = string("slot_axes_0"), val = tensor([2])]; tensor slot_cast_fp16 = expand_dims(axes = slot_axes_0, x = var_4324_cast_fp16)[name = string("slot_cast_fp16")]; tensor live_tail_begin_0 = const()[name = string("live_tail_begin_0"), val = tensor([0, 0, 0, 1])]; tensor live_tail_end_0 = const()[name = string("live_tail_end_0"), val = tensor([1, 1024, 1, 1])]; tensor live_tail_end_mask_0 = const()[name = string("live_tail_end_mask_0"), val = tensor([true, true, true, true])]; tensor live_tail_cast_fp16 = slice_by_index(begin = live_tail_begin_0, end = live_tail_end_0, end_mask = live_tail_end_mask_0, x = slot_cast_fp16)[name = string("live_tail_cast_fp16")]; int32 var_4333 = const()[name = string("op_4333"), val = int32(-1)]; bool new_state_interleave_0 = const()[name = string("new_state_interleave_0"), val = bool(false)]; tensor new_state_cast_fp16 = concat(axis = var_4333, interleave = new_state_interleave_0, values = (live_tail_cast_fp16, Bx))[name = string("new_state_cast_fp16")]; tensor var_4336_axes_0 = const()[name = string("op_4336_axes_0"), val = tensor([0])]; tensor var_4336_cast_fp16 = squeeze(axes = var_4336_axes_0, x = new_state_cast_fp16)[name = string("op_4336_cast_fp16")]; tensor new_slot_axes_0 = const()[name = string("new_slot_axes_0"), val = tensor([1])]; tensor new_slot_cast_fp16 = squeeze(axes = new_slot_axes_0, x = var_4336_cast_fp16)[name = string("new_slot_cast_fp16")]; string conv_out_pad_type_0 = const()[name = string("conv_out_pad_type_0"), val = string("valid")]; int32 conv_out_groups_0 = const()[name = string("conv_out_groups_0"), val = int32(1024)]; tensor conv_out_strides_0 = const()[name = string("conv_out_strides_0"), val = tensor([1, 1])]; tensor conv_out_pad_0 = const()[name = string("conv_out_pad_0"), val = tensor([0, 0, 0, 0])]; tensor conv_out_dilations_0 = const()[name = string("conv_out_dilations_0"), val = tensor([1, 1])]; tensor layers_15_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_15_conv_conv_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842136320)))]; tensor conv_out_cast_fp16 = conv(dilations = conv_out_dilations_0, groups = conv_out_groups_0, pad = conv_out_pad_0, pad_type = conv_out_pad_type_0, strides = conv_out_strides_0, weight = layers_15_conv_conv_weight_promoted_to_fp16, x = new_state_cast_fp16)[name = string("conv_out_cast_fp16")]; tensor input_239_cast_fp16 = mul(x = var_4316_1, y = conv_out_cast_fp16)[name = string("input_239_cast_fp16")]; string y_pad_type_0 = const()[name = string("y_pad_type_0"), val = string("valid")]; tensor y_strides_0 = const()[name = string("y_strides_0"), val = tensor([1, 1])]; tensor y_pad_0 = const()[name = string("y_pad_0"), val = tensor([0, 0, 0, 0])]; tensor y_dilations_0 = const()[name = string("y_dilations_0"), val = tensor([1, 1])]; int32 y_groups_0 = const()[name = string("y_groups_0"), val = int32(1)]; tensor layers_15_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_15_conv_out_proj_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842142528)))]; tensor y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = layers_15_conv_out_proj_weight_promoted_to_fp16, x = input_239_cast_fp16)[name = string("y_cast_fp16")]; tensor var_4364_axes_0 = const()[name = string("op_4364_axes_0"), val = tensor([2])]; tensor var_4364_cast_fp16 = squeeze(axes = var_4364_axes_0, x = y_cast_fp16)[name = string("op_4364_cast_fp16")]; tensor var_4368 = const()[name = string("op_4368"), val = tensor([0, 2, 1])]; tensor op_out_cast_fp16 = transpose(perm = var_4368, x = var_4364_cast_fp16)[name = string("transpose_34")]; tensor x_87_cast_fp16 = add(x = x_85_cast_fp16, y = op_out_cast_fp16)[name = string("x_87_cast_fp16")]; fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4372_cast_fp16 = mul(x = x_87_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_4372_cast_fp16")]; int32 var_4374 = const()[name = string("op_4374"), val = int32(-1)]; bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; tensor input_241_cast_fp16 = concat(axis = var_4374, interleave = input_241_interleave_0, values = (x_87_cast_fp16, var_4372_cast_fp16))[name = string("input_241_cast_fp16")]; tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; fp16 var_4380_to_fp16 = const()[name = string("op_4380_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4380_to_fp16, x = input_241_cast_fp16)[name = string("normed_117_cast_fp16")]; tensor var_4383_split_sizes_0 = const()[name = string("op_4383_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4383_axis_0 = const()[name = string("op_4383_axis_0"), val = int32(-1)]; tensor var_4383_cast_fp16_0, tensor var_4383_cast_fp16_1 = split(axis = var_4383_axis_0, split_sizes = var_4383_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_4383_cast_fp16")]; tensor layers_15_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_15_ffn_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(844239744)))]; tensor normed_119_cast_fp16 = mul(x = var_4383_cast_fp16_0, y = layers_15_ffn_norm_weight_promoted_to_fp16)[name = string("normed_119_cast_fp16")]; tensor var_4389 = const()[name = string("op_4389"), val = tensor([0, 2, 1])]; tensor var_4392_axes_0 = const()[name = string("op_4392_axes_0"), val = tensor([2])]; tensor var_4390_cast_fp16 = transpose(perm = var_4389, x = normed_119_cast_fp16)[name = string("transpose_33")]; tensor var_4392_cast_fp16 = expand_dims(axes = var_4392_axes_0, x = var_4390_cast_fp16)[name = string("op_4392_cast_fp16")]; string input_245_pad_type_0 = const()[name = string("input_245_pad_type_0"), val = string("valid")]; tensor input_245_strides_0 = const()[name = string("input_245_strides_0"), val = tensor([1, 1])]; tensor input_245_pad_0 = const()[name = string("input_245_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_245_dilations_0 = const()[name = string("input_245_dilations_0"), val = tensor([1, 1])]; int32 input_245_groups_0 = const()[name = string("input_245_groups_0"), val = int32(1)]; tensor input_245 = conv(dilations = input_245_dilations_0, groups = input_245_groups_0, pad = input_245_pad_0, pad_type = input_245_pad_type_0, strides = input_245_strides_0, weight = layers_15_feed_forward_w1_weight, x = var_4392_cast_fp16)[name = string("input_245")]; string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = layers_15_feed_forward_w3_weight, x = var_4392_cast_fp16)[name = string("b")]; tensor var_4420 = silu(x = input_245)[name = string("op_4420")]; tensor input_247 = mul(x = var_4420, y = b)[name = string("input_247")]; string mlp_61_pad_type_0 = const()[name = string("mlp_61_pad_type_0"), val = string("valid")]; tensor mlp_61_strides_0 = const()[name = string("mlp_61_strides_0"), val = tensor([1, 1])]; tensor mlp_61_pad_0 = const()[name = string("mlp_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_61_dilations_0 = const()[name = string("mlp_61_dilations_0"), val = tensor([1, 1])]; int32 mlp_61_groups_0 = const()[name = string("mlp_61_groups_0"), val = int32(1)]; tensor mlp_61 = conv(dilations = mlp_61_dilations_0, groups = mlp_61_groups_0, pad = mlp_61_pad_0, pad_type = mlp_61_pad_type_0, strides = mlp_61_strides_0, weight = layers_15_feed_forward_w2_weight, x = input_247)[name = string("mlp_61")]; tensor var_4434_axes_0 = const()[name = string("op_4434_axes_0"), val = tensor([2])]; tensor var_4434 = squeeze(axes = var_4434_axes_0, x = mlp_61)[name = string("op_4434")]; tensor var_4438 = const()[name = string("op_4438"), val = tensor([0, 2, 1])]; tensor mlp = transpose(perm = var_4438, x = var_4434)[name = string("transpose_32")]; tensor x_cast_fp16 = add(x = x_87_cast_fp16, y = mlp)[name = string("x_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4442_cast_fp16 = mul(x = x_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_4442_cast_fp16")]; int32 var_4444 = const()[name = string("op_4444"), val = int32(-1)]; bool input_249_interleave_0 = const()[name = string("input_249_interleave_0"), val = bool(false)]; tensor input_249_cast_fp16 = concat(axis = var_4444, interleave = input_249_interleave_0, values = (x_cast_fp16, var_4442_cast_fp16))[name = string("input_249_cast_fp16")]; tensor normed_axes_0 = const()[name = string("normed_axes_0"), val = tensor([-1])]; fp16 var_4450_to_fp16 = const()[name = string("op_4450_to_fp16"), val = fp16(0x1.5p-17)]; tensor normed_cast_fp16 = layer_norm(axes = normed_axes_0, epsilon = var_4450_to_fp16, x = input_249_cast_fp16)[name = string("normed_cast_fp16")]; tensor var_4453_split_sizes_0 = const()[name = string("op_4453_split_sizes_0"), val = tensor([1024, 1024])]; int32 var_4453_axis_0 = const()[name = string("op_4453_axis_0"), val = int32(-1)]; tensor var_4453_cast_fp16_0, tensor var_4453_cast_fp16_1 = split(axis = var_4453_axis_0, split_sizes = var_4453_split_sizes_0, x = normed_cast_fp16)[name = string("op_4453_cast_fp16")]; tensor embedding_norm_weight_promoted_to_fp16 = const()[name = string("embedding_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(844241856)))]; tensor hidden_states_cast_fp16 = mul(x = var_4453_cast_fp16_0, y = embedding_norm_weight_promoted_to_fp16)[name = string("hidden_states_cast_fp16")]; tensor var_4459 = const()[name = string("op_4459"), val = tensor([0, 2, 1])]; tensor var_4462_axes_0 = const()[name = string("op_4462_axes_0"), val = tensor([2])]; tensor var_4460_cast_fp16 = transpose(perm = var_4459, x = hidden_states_cast_fp16)[name = string("transpose_31")]; tensor var_4462_cast_fp16 = expand_dims(axes = var_4462_axes_0, x = var_4460_cast_fp16)[name = string("op_4462_cast_fp16")]; string var_4478_pad_type_0 = const()[name = string("op_4478_pad_type_0"), val = string("valid")]; tensor var_4478_strides_0 = const()[name = string("op_4478_strides_0"), val = tensor([1, 1])]; tensor var_4478_pad_0 = const()[name = string("op_4478_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4478_dilations_0 = const()[name = string("op_4478_dilations_0"), val = tensor([1, 1])]; int32 var_4478_groups_0 = const()[name = string("op_4478_groups_0"), val = int32(1)]; tensor var_4478 = conv(dilations = var_4478_dilations_0, groups = var_4478_groups_0, pad = var_4478_pad_0, pad_type = var_4478_pad_type_0, strides = var_4478_strides_0, weight = lm_head_weight, x = var_4462_cast_fp16)[name = string("op_4478")]; tensor var_4480_axes_0 = const()[name = string("op_4480_axes_0"), val = tensor([2])]; tensor var_4480 = squeeze(axes = var_4480_axes_0, x = var_4478)[name = string("op_4480")]; tensor var_4484 = const()[name = string("op_4484"), val = tensor([0, 2, 1])]; tensor logits_2d_axes_0 = const()[name = string("logits_2d_axes_0"), val = tensor([0])]; tensor logits = transpose(perm = var_4484, x = var_4480)[name = string("transpose_30")]; tensor logits_2d = squeeze(axes = logits_2d_axes_0, x = logits)[name = string("logits_2d")]; int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_2d)[name = string("token_id")]; tensor var_4492_axes_0 = const()[name = string("op_4492_axes_0"), val = tensor([-1])]; tensor var_4492 = expand_dims(axes = var_4492_axes_0, x = token_id)[name = string("op_4492")]; int32 var_4493 = const()[name = string("op_4493"), val = int32(-1)]; bool var_4495_validate_indices_0 = const()[name = string("op_4495_validate_indices_0"), val = bool(false)]; tensor var_4495 = gather_along_axis(axis = var_4493, indices = var_4492, validate_indices = var_4495_validate_indices_0, x = logits_2d)[name = string("op_4495")]; tensor var_4497_axes_0 = const()[name = string("op_4497_axes_0"), val = tensor([-1])]; tensor token_logit = squeeze(axes = var_4497_axes_0, x = var_4495)[name = string("op_4497")]; int32 var_4500_axis_0 = const()[name = string("op_4500_axis_0"), val = int32(0)]; tensor conv_state_out = stack(axis = var_4500_axis_0, values = (var_693_cast_fp16, var_856_cast_fp16, var_1382_cast_fp16, var_1545_cast_fp16, var_2071_cast_fp16, var_2234_cast_fp16, var_2760_cast_fp16, var_3286_cast_fp16, var_3812_cast_fp16, new_slot_cast_fp16))[name = string("op_4500_cast_fp16")]; } -> (token_id, token_logit, conv_state_out); }